{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999534955820803, "eval_steps": 500, "global_step": 6450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004650441791970237, "grad_norm": 6.0558061599731445, "learning_rate": 1.550387596899225e-08, "loss": 0.8113, "step": 1 }, { "epoch": 0.0009300883583940474, "grad_norm": 6.12605619430542, "learning_rate": 3.10077519379845e-08, "loss": 0.825, "step": 2 }, { "epoch": 0.0013951325375910712, "grad_norm": 6.057300567626953, "learning_rate": 4.6511627906976744e-08, "loss": 0.8147, "step": 3 }, { "epoch": 0.0018601767167880949, "grad_norm": 6.23735237121582, "learning_rate": 6.2015503875969e-08, "loss": 0.8659, "step": 4 }, { "epoch": 0.002325220895985119, "grad_norm": 5.970339298248291, "learning_rate": 7.751937984496124e-08, "loss": 0.7994, "step": 5 }, { "epoch": 0.0027902650751821423, "grad_norm": 6.014130592346191, "learning_rate": 9.302325581395349e-08, "loss": 0.7927, "step": 6 }, { "epoch": 0.0032553092543791662, "grad_norm": 6.405108451843262, "learning_rate": 1.0852713178294575e-07, "loss": 0.8445, "step": 7 }, { "epoch": 0.0037203534335761897, "grad_norm": 5.915679931640625, "learning_rate": 1.24031007751938e-07, "loss": 0.7874, "step": 8 }, { "epoch": 0.004185397612773214, "grad_norm": 6.164096355438232, "learning_rate": 1.3953488372093024e-07, "loss": 0.841, "step": 9 }, { "epoch": 0.004650441791970238, "grad_norm": 6.06751012802124, "learning_rate": 1.5503875968992249e-07, "loss": 0.8139, "step": 10 }, { "epoch": 0.005115485971167261, "grad_norm": 6.07175350189209, "learning_rate": 1.7054263565891473e-07, "loss": 0.8137, "step": 11 }, { "epoch": 0.005580530150364285, "grad_norm": 6.381070613861084, "learning_rate": 1.8604651162790698e-07, "loss": 0.8233, "step": 12 }, { "epoch": 0.0060455743295613085, "grad_norm": 6.221768379211426, "learning_rate": 2.0155038759689925e-07, "loss": 0.8265, "step": 13 }, { "epoch": 0.0065106185087583325, "grad_norm": 5.883142471313477, "learning_rate": 2.170542635658915e-07, "loss": 0.7789, "step": 14 }, { "epoch": 0.0069756626879553555, "grad_norm": 5.981773853302002, "learning_rate": 2.3255813953488374e-07, "loss": 0.7948, "step": 15 }, { "epoch": 0.0074407068671523795, "grad_norm": 5.9482035636901855, "learning_rate": 2.48062015503876e-07, "loss": 0.782, "step": 16 }, { "epoch": 0.007905751046349403, "grad_norm": 6.413148403167725, "learning_rate": 2.6356589147286826e-07, "loss": 0.8528, "step": 17 }, { "epoch": 0.008370795225546427, "grad_norm": 5.644394397735596, "learning_rate": 2.790697674418605e-07, "loss": 0.8007, "step": 18 }, { "epoch": 0.00883583940474345, "grad_norm": 5.608232021331787, "learning_rate": 2.9457364341085275e-07, "loss": 0.7642, "step": 19 }, { "epoch": 0.009300883583940475, "grad_norm": 5.738401889801025, "learning_rate": 3.1007751937984497e-07, "loss": 0.8086, "step": 20 }, { "epoch": 0.009765927763137498, "grad_norm": 5.834144115447998, "learning_rate": 3.2558139534883724e-07, "loss": 0.8128, "step": 21 }, { "epoch": 0.010230971942334521, "grad_norm": 5.556512355804443, "learning_rate": 3.4108527131782946e-07, "loss": 0.7919, "step": 22 }, { "epoch": 0.010696016121531546, "grad_norm": 5.6250200271606445, "learning_rate": 3.5658914728682174e-07, "loss": 0.7924, "step": 23 }, { "epoch": 0.01116106030072857, "grad_norm": 4.8199992179870605, "learning_rate": 3.7209302325581396e-07, "loss": 0.7686, "step": 24 }, { "epoch": 0.011626104479925592, "grad_norm": 4.794100761413574, "learning_rate": 3.8759689922480623e-07, "loss": 0.7635, "step": 25 }, { "epoch": 0.012091148659122617, "grad_norm": 4.6750874519348145, "learning_rate": 4.031007751937985e-07, "loss": 0.748, "step": 26 }, { "epoch": 0.01255619283831964, "grad_norm": 4.775611877441406, "learning_rate": 4.186046511627907e-07, "loss": 0.7594, "step": 27 }, { "epoch": 0.013021237017516665, "grad_norm": 4.403256893157959, "learning_rate": 4.34108527131783e-07, "loss": 0.7302, "step": 28 }, { "epoch": 0.013486281196713688, "grad_norm": 4.565180778503418, "learning_rate": 4.496124031007752e-07, "loss": 0.7729, "step": 29 }, { "epoch": 0.013951325375910711, "grad_norm": 4.50689697265625, "learning_rate": 4.651162790697675e-07, "loss": 0.7524, "step": 30 }, { "epoch": 0.014416369555107736, "grad_norm": 4.400961875915527, "learning_rate": 4.806201550387598e-07, "loss": 0.7466, "step": 31 }, { "epoch": 0.014881413734304759, "grad_norm": 4.230681419372559, "learning_rate": 4.96124031007752e-07, "loss": 0.7352, "step": 32 }, { "epoch": 0.015346457913501782, "grad_norm": 3.3713021278381348, "learning_rate": 5.116279069767442e-07, "loss": 0.7095, "step": 33 }, { "epoch": 0.015811502092698805, "grad_norm": 2.6536169052124023, "learning_rate": 5.271317829457365e-07, "loss": 0.7099, "step": 34 }, { "epoch": 0.01627654627189583, "grad_norm": 2.5625736713409424, "learning_rate": 5.426356589147287e-07, "loss": 0.7209, "step": 35 }, { "epoch": 0.016741590451092855, "grad_norm": 2.7283709049224854, "learning_rate": 5.58139534883721e-07, "loss": 0.7052, "step": 36 }, { "epoch": 0.017206634630289878, "grad_norm": 2.3967738151550293, "learning_rate": 5.736434108527132e-07, "loss": 0.6705, "step": 37 }, { "epoch": 0.0176716788094869, "grad_norm": 2.436204195022583, "learning_rate": 5.891472868217055e-07, "loss": 0.726, "step": 38 }, { "epoch": 0.018136722988683924, "grad_norm": 2.3361918926239014, "learning_rate": 6.046511627906977e-07, "loss": 0.6694, "step": 39 }, { "epoch": 0.01860176716788095, "grad_norm": 2.221872568130493, "learning_rate": 6.201550387596899e-07, "loss": 0.6728, "step": 40 }, { "epoch": 0.019066811347077973, "grad_norm": 2.210745096206665, "learning_rate": 6.356589147286822e-07, "loss": 0.6902, "step": 41 }, { "epoch": 0.019531855526274997, "grad_norm": 2.1230080127716064, "learning_rate": 6.511627906976745e-07, "loss": 0.697, "step": 42 }, { "epoch": 0.01999689970547202, "grad_norm": 2.061232089996338, "learning_rate": 6.666666666666667e-07, "loss": 0.6746, "step": 43 }, { "epoch": 0.020461943884669043, "grad_norm": 1.6226105690002441, "learning_rate": 6.821705426356589e-07, "loss": 0.6732, "step": 44 }, { "epoch": 0.020926988063866066, "grad_norm": 1.4576574563980103, "learning_rate": 6.976744186046513e-07, "loss": 0.6821, "step": 45 }, { "epoch": 0.021392032243063092, "grad_norm": 1.4970301389694214, "learning_rate": 7.131782945736435e-07, "loss": 0.6661, "step": 46 }, { "epoch": 0.021857076422260115, "grad_norm": 1.6816171407699585, "learning_rate": 7.286821705426357e-07, "loss": 0.671, "step": 47 }, { "epoch": 0.02232212060145714, "grad_norm": 1.7516964673995972, "learning_rate": 7.441860465116279e-07, "loss": 0.5983, "step": 48 }, { "epoch": 0.02278716478065416, "grad_norm": 1.8953578472137451, "learning_rate": 7.596899224806202e-07, "loss": 0.6441, "step": 49 }, { "epoch": 0.023252208959851185, "grad_norm": 1.8705769777297974, "learning_rate": 7.751937984496125e-07, "loss": 0.6332, "step": 50 }, { "epoch": 0.02371725313904821, "grad_norm": 1.9592753648757935, "learning_rate": 7.906976744186047e-07, "loss": 0.6445, "step": 51 }, { "epoch": 0.024182297318245234, "grad_norm": 1.7867804765701294, "learning_rate": 8.06201550387597e-07, "loss": 0.6449, "step": 52 }, { "epoch": 0.024647341497442257, "grad_norm": 1.7160060405731201, "learning_rate": 8.217054263565892e-07, "loss": 0.6299, "step": 53 }, { "epoch": 0.02511238567663928, "grad_norm": 1.3905798196792603, "learning_rate": 8.372093023255814e-07, "loss": 0.5903, "step": 54 }, { "epoch": 0.025577429855836303, "grad_norm": 1.2955713272094727, "learning_rate": 8.527131782945737e-07, "loss": 0.6332, "step": 55 }, { "epoch": 0.02604247403503333, "grad_norm": 1.2509273290634155, "learning_rate": 8.68217054263566e-07, "loss": 0.6233, "step": 56 }, { "epoch": 0.026507518214230353, "grad_norm": 1.0419048070907593, "learning_rate": 8.837209302325582e-07, "loss": 0.5845, "step": 57 }, { "epoch": 0.026972562393427376, "grad_norm": 1.083308458328247, "learning_rate": 8.992248062015504e-07, "loss": 0.6179, "step": 58 }, { "epoch": 0.0274376065726244, "grad_norm": 1.011205792427063, "learning_rate": 9.147286821705427e-07, "loss": 0.6055, "step": 59 }, { "epoch": 0.027902650751821422, "grad_norm": 0.9679791927337646, "learning_rate": 9.30232558139535e-07, "loss": 0.549, "step": 60 }, { "epoch": 0.028367694931018445, "grad_norm": 0.9358096718788147, "learning_rate": 9.457364341085272e-07, "loss": 0.5544, "step": 61 }, { "epoch": 0.028832739110215472, "grad_norm": 0.8703131079673767, "learning_rate": 9.612403100775195e-07, "loss": 0.5859, "step": 62 }, { "epoch": 0.029297783289412495, "grad_norm": 0.7809782028198242, "learning_rate": 9.767441860465117e-07, "loss": 0.5716, "step": 63 }, { "epoch": 0.029762827468609518, "grad_norm": 0.7614837884902954, "learning_rate": 9.92248062015504e-07, "loss": 0.5784, "step": 64 }, { "epoch": 0.03022787164780654, "grad_norm": 0.7258252501487732, "learning_rate": 1.0077519379844962e-06, "loss": 0.5985, "step": 65 }, { "epoch": 0.030692915827003564, "grad_norm": 0.7365714907646179, "learning_rate": 1.0232558139534884e-06, "loss": 0.5962, "step": 66 }, { "epoch": 0.03115796000620059, "grad_norm": 0.7005903124809265, "learning_rate": 1.0387596899224806e-06, "loss": 0.5533, "step": 67 }, { "epoch": 0.03162300418539761, "grad_norm": 0.7548089623451233, "learning_rate": 1.054263565891473e-06, "loss": 0.6215, "step": 68 }, { "epoch": 0.03208804836459463, "grad_norm": 0.6190081238746643, "learning_rate": 1.0697674418604653e-06, "loss": 0.5489, "step": 69 }, { "epoch": 0.03255309254379166, "grad_norm": 0.7042703628540039, "learning_rate": 1.0852713178294575e-06, "loss": 0.5756, "step": 70 }, { "epoch": 0.033018136722988686, "grad_norm": 0.6290520429611206, "learning_rate": 1.1007751937984497e-06, "loss": 0.5925, "step": 71 }, { "epoch": 0.03348318090218571, "grad_norm": 0.5906243920326233, "learning_rate": 1.116279069767442e-06, "loss": 0.5618, "step": 72 }, { "epoch": 0.03394822508138273, "grad_norm": 0.5369678735733032, "learning_rate": 1.1317829457364341e-06, "loss": 0.555, "step": 73 }, { "epoch": 0.034413269260579755, "grad_norm": 0.5198186039924622, "learning_rate": 1.1472868217054264e-06, "loss": 0.534, "step": 74 }, { "epoch": 0.03487831343977678, "grad_norm": 0.5380486845970154, "learning_rate": 1.1627906976744188e-06, "loss": 0.5528, "step": 75 }, { "epoch": 0.0353433576189738, "grad_norm": 0.5507330298423767, "learning_rate": 1.178294573643411e-06, "loss": 0.5608, "step": 76 }, { "epoch": 0.035808401798170825, "grad_norm": 0.5470713376998901, "learning_rate": 1.1937984496124032e-06, "loss": 0.549, "step": 77 }, { "epoch": 0.03627344597736785, "grad_norm": 0.5553979277610779, "learning_rate": 1.2093023255813954e-06, "loss": 0.5203, "step": 78 }, { "epoch": 0.03673849015656487, "grad_norm": 0.5427577495574951, "learning_rate": 1.2248062015503877e-06, "loss": 0.5348, "step": 79 }, { "epoch": 0.0372035343357619, "grad_norm": 0.5340808629989624, "learning_rate": 1.2403100775193799e-06, "loss": 0.5373, "step": 80 }, { "epoch": 0.037668578514958924, "grad_norm": 0.5276907682418823, "learning_rate": 1.2558139534883723e-06, "loss": 0.5403, "step": 81 }, { "epoch": 0.03813362269415595, "grad_norm": 0.4982495605945587, "learning_rate": 1.2713178294573643e-06, "loss": 0.5283, "step": 82 }, { "epoch": 0.03859866687335297, "grad_norm": 0.48218581080436707, "learning_rate": 1.2868217054263568e-06, "loss": 0.5499, "step": 83 }, { "epoch": 0.03906371105254999, "grad_norm": 0.4612601399421692, "learning_rate": 1.302325581395349e-06, "loss": 0.5276, "step": 84 }, { "epoch": 0.039528755231747016, "grad_norm": 0.48117271065711975, "learning_rate": 1.3178294573643414e-06, "loss": 0.5462, "step": 85 }, { "epoch": 0.03999379941094404, "grad_norm": 0.464334636926651, "learning_rate": 1.3333333333333334e-06, "loss": 0.5255, "step": 86 }, { "epoch": 0.04045884359014106, "grad_norm": 0.5120902061462402, "learning_rate": 1.3488372093023258e-06, "loss": 0.553, "step": 87 }, { "epoch": 0.040923887769338085, "grad_norm": 0.5096696019172668, "learning_rate": 1.3643410852713179e-06, "loss": 0.5242, "step": 88 }, { "epoch": 0.04138893194853511, "grad_norm": 0.44623059034347534, "learning_rate": 1.3798449612403103e-06, "loss": 0.523, "step": 89 }, { "epoch": 0.04185397612773213, "grad_norm": 0.47654956579208374, "learning_rate": 1.3953488372093025e-06, "loss": 0.5335, "step": 90 }, { "epoch": 0.04231902030692916, "grad_norm": 0.4882866144180298, "learning_rate": 1.4108527131782947e-06, "loss": 0.5176, "step": 91 }, { "epoch": 0.042784064486126185, "grad_norm": 0.46192339062690735, "learning_rate": 1.426356589147287e-06, "loss": 0.545, "step": 92 }, { "epoch": 0.04324910866532321, "grad_norm": 0.4416024684906006, "learning_rate": 1.4418604651162794e-06, "loss": 0.526, "step": 93 }, { "epoch": 0.04371415284452023, "grad_norm": 0.45274990797042847, "learning_rate": 1.4573643410852714e-06, "loss": 0.5233, "step": 94 }, { "epoch": 0.044179197023717254, "grad_norm": 0.4544108808040619, "learning_rate": 1.4728682170542638e-06, "loss": 0.5118, "step": 95 }, { "epoch": 0.04464424120291428, "grad_norm": 0.4827730655670166, "learning_rate": 1.4883720930232558e-06, "loss": 0.5087, "step": 96 }, { "epoch": 0.0451092853821113, "grad_norm": 0.45739415287971497, "learning_rate": 1.5038759689922483e-06, "loss": 0.5058, "step": 97 }, { "epoch": 0.04557432956130832, "grad_norm": 0.4581778645515442, "learning_rate": 1.5193798449612405e-06, "loss": 0.5272, "step": 98 }, { "epoch": 0.046039373740505346, "grad_norm": 0.43706098198890686, "learning_rate": 1.534883720930233e-06, "loss": 0.5205, "step": 99 }, { "epoch": 0.04650441791970237, "grad_norm": 0.43774181604385376, "learning_rate": 1.550387596899225e-06, "loss": 0.5033, "step": 100 }, { "epoch": 0.04696946209889939, "grad_norm": 0.42946186661720276, "learning_rate": 1.5658914728682173e-06, "loss": 0.5567, "step": 101 }, { "epoch": 0.04743450627809642, "grad_norm": 0.4620288908481598, "learning_rate": 1.5813953488372093e-06, "loss": 0.4879, "step": 102 }, { "epoch": 0.047899550457293445, "grad_norm": 0.4660142958164215, "learning_rate": 1.5968992248062018e-06, "loss": 0.5373, "step": 103 }, { "epoch": 0.04836459463649047, "grad_norm": 0.4969952404499054, "learning_rate": 1.612403100775194e-06, "loss": 0.5156, "step": 104 }, { "epoch": 0.04882963881568749, "grad_norm": 0.4132183790206909, "learning_rate": 1.6279069767441862e-06, "loss": 0.496, "step": 105 }, { "epoch": 0.049294682994884514, "grad_norm": 0.433496356010437, "learning_rate": 1.6434108527131784e-06, "loss": 0.5313, "step": 106 }, { "epoch": 0.04975972717408154, "grad_norm": 0.4335452914237976, "learning_rate": 1.6589147286821709e-06, "loss": 0.5167, "step": 107 }, { "epoch": 0.05022477135327856, "grad_norm": 0.43790504336357117, "learning_rate": 1.6744186046511629e-06, "loss": 0.4973, "step": 108 }, { "epoch": 0.050689815532475584, "grad_norm": 0.4186980724334717, "learning_rate": 1.6899224806201553e-06, "loss": 0.4954, "step": 109 }, { "epoch": 0.05115485971167261, "grad_norm": 0.40744200348854065, "learning_rate": 1.7054263565891473e-06, "loss": 0.4927, "step": 110 }, { "epoch": 0.05161990389086963, "grad_norm": 0.43091249465942383, "learning_rate": 1.7209302325581397e-06, "loss": 0.5062, "step": 111 }, { "epoch": 0.05208494807006666, "grad_norm": 0.4677499830722809, "learning_rate": 1.736434108527132e-06, "loss": 0.5107, "step": 112 }, { "epoch": 0.05254999224926368, "grad_norm": 0.42137694358825684, "learning_rate": 1.7519379844961242e-06, "loss": 0.5132, "step": 113 }, { "epoch": 0.053015036428460706, "grad_norm": 0.4344700872898102, "learning_rate": 1.7674418604651164e-06, "loss": 0.4793, "step": 114 }, { "epoch": 0.05348008060765773, "grad_norm": 0.4326712489128113, "learning_rate": 1.7829457364341088e-06, "loss": 0.4715, "step": 115 }, { "epoch": 0.05394512478685475, "grad_norm": 0.4487822949886322, "learning_rate": 1.7984496124031008e-06, "loss": 0.5165, "step": 116 }, { "epoch": 0.054410168966051775, "grad_norm": 0.46336719393730164, "learning_rate": 1.8139534883720933e-06, "loss": 0.512, "step": 117 }, { "epoch": 0.0548752131452488, "grad_norm": 0.4590426981449127, "learning_rate": 1.8294573643410855e-06, "loss": 0.5036, "step": 118 }, { "epoch": 0.05534025732444582, "grad_norm": 0.4003012478351593, "learning_rate": 1.8449612403100777e-06, "loss": 0.4894, "step": 119 }, { "epoch": 0.055805301503642844, "grad_norm": 0.45950761437416077, "learning_rate": 1.86046511627907e-06, "loss": 0.5109, "step": 120 }, { "epoch": 0.05627034568283987, "grad_norm": 0.43410784006118774, "learning_rate": 1.8759689922480624e-06, "loss": 0.4966, "step": 121 }, { "epoch": 0.05673538986203689, "grad_norm": 0.45534461736679077, "learning_rate": 1.8914728682170544e-06, "loss": 0.5311, "step": 122 }, { "epoch": 0.05720043404123392, "grad_norm": 0.44469308853149414, "learning_rate": 1.9069767441860468e-06, "loss": 0.4967, "step": 123 }, { "epoch": 0.057665478220430944, "grad_norm": 0.4573187232017517, "learning_rate": 1.922480620155039e-06, "loss": 0.4864, "step": 124 }, { "epoch": 0.05813052239962797, "grad_norm": 0.45671388506889343, "learning_rate": 1.9379844961240315e-06, "loss": 0.5046, "step": 125 }, { "epoch": 0.05859556657882499, "grad_norm": 0.4924532473087311, "learning_rate": 1.9534883720930235e-06, "loss": 0.5474, "step": 126 }, { "epoch": 0.05906061075802201, "grad_norm": 0.45947206020355225, "learning_rate": 1.968992248062016e-06, "loss": 0.463, "step": 127 }, { "epoch": 0.059525654937219036, "grad_norm": 0.4114924967288971, "learning_rate": 1.984496124031008e-06, "loss": 0.5045, "step": 128 }, { "epoch": 0.05999069911641606, "grad_norm": 0.424427330493927, "learning_rate": 2.0000000000000003e-06, "loss": 0.4865, "step": 129 }, { "epoch": 0.06045574329561308, "grad_norm": 0.457966685295105, "learning_rate": 2.0155038759689923e-06, "loss": 0.492, "step": 130 }, { "epoch": 0.060920787474810105, "grad_norm": 0.4255077540874481, "learning_rate": 2.0310077519379848e-06, "loss": 0.4961, "step": 131 }, { "epoch": 0.06138583165400713, "grad_norm": 0.39936286211013794, "learning_rate": 2.0465116279069768e-06, "loss": 0.4595, "step": 132 }, { "epoch": 0.06185087583320415, "grad_norm": 0.39685145020484924, "learning_rate": 2.062015503875969e-06, "loss": 0.4645, "step": 133 }, { "epoch": 0.06231592001240118, "grad_norm": 0.44578003883361816, "learning_rate": 2.0775193798449612e-06, "loss": 0.4848, "step": 134 }, { "epoch": 0.0627809641915982, "grad_norm": 0.4608939290046692, "learning_rate": 2.0930232558139536e-06, "loss": 0.444, "step": 135 }, { "epoch": 0.06324600837079522, "grad_norm": 0.43158158659935, "learning_rate": 2.108527131782946e-06, "loss": 0.4781, "step": 136 }, { "epoch": 0.06371105254999225, "grad_norm": 0.43560659885406494, "learning_rate": 2.124031007751938e-06, "loss": 0.4768, "step": 137 }, { "epoch": 0.06417609672918927, "grad_norm": 0.39161673188209534, "learning_rate": 2.1395348837209305e-06, "loss": 0.4824, "step": 138 }, { "epoch": 0.0646411409083863, "grad_norm": 0.492993026971817, "learning_rate": 2.155038759689923e-06, "loss": 0.4708, "step": 139 }, { "epoch": 0.06510618508758333, "grad_norm": 0.4824112057685852, "learning_rate": 2.170542635658915e-06, "loss": 0.5064, "step": 140 }, { "epoch": 0.06557122926678034, "grad_norm": 0.42813244462013245, "learning_rate": 2.1860465116279074e-06, "loss": 0.4815, "step": 141 }, { "epoch": 0.06603627344597737, "grad_norm": 0.43404629826545715, "learning_rate": 2.2015503875968994e-06, "loss": 0.4808, "step": 142 }, { "epoch": 0.06650131762517439, "grad_norm": 0.44562390446662903, "learning_rate": 2.217054263565892e-06, "loss": 0.495, "step": 143 }, { "epoch": 0.06696636180437142, "grad_norm": 0.43140050768852234, "learning_rate": 2.232558139534884e-06, "loss": 0.4634, "step": 144 }, { "epoch": 0.06743140598356843, "grad_norm": 0.43824145197868347, "learning_rate": 2.2480620155038763e-06, "loss": 0.5077, "step": 145 }, { "epoch": 0.06789645016276546, "grad_norm": 0.45454517006874084, "learning_rate": 2.2635658914728683e-06, "loss": 0.4605, "step": 146 }, { "epoch": 0.06836149434196248, "grad_norm": 0.4356607496738434, "learning_rate": 2.2790697674418607e-06, "loss": 0.5132, "step": 147 }, { "epoch": 0.06882653852115951, "grad_norm": 0.4299354553222656, "learning_rate": 2.2945736434108527e-06, "loss": 0.4709, "step": 148 }, { "epoch": 0.06929158270035653, "grad_norm": 0.4432096481323242, "learning_rate": 2.310077519379845e-06, "loss": 0.4743, "step": 149 }, { "epoch": 0.06975662687955356, "grad_norm": 0.4662824273109436, "learning_rate": 2.3255813953488376e-06, "loss": 0.5184, "step": 150 }, { "epoch": 0.07022167105875059, "grad_norm": 0.4367971420288086, "learning_rate": 2.3410852713178296e-06, "loss": 0.4778, "step": 151 }, { "epoch": 0.0706867152379476, "grad_norm": 0.4500759243965149, "learning_rate": 2.356589147286822e-06, "loss": 0.4534, "step": 152 }, { "epoch": 0.07115175941714463, "grad_norm": 0.4312923848628998, "learning_rate": 2.3720930232558144e-06, "loss": 0.5134, "step": 153 }, { "epoch": 0.07161680359634165, "grad_norm": 0.4794353246688843, "learning_rate": 2.3875968992248065e-06, "loss": 0.4641, "step": 154 }, { "epoch": 0.07208184777553868, "grad_norm": 0.4282940924167633, "learning_rate": 2.403100775193799e-06, "loss": 0.4693, "step": 155 }, { "epoch": 0.0725468919547357, "grad_norm": 0.4426284730434418, "learning_rate": 2.418604651162791e-06, "loss": 0.4785, "step": 156 }, { "epoch": 0.07301193613393273, "grad_norm": 0.45532897114753723, "learning_rate": 2.4341085271317833e-06, "loss": 0.4934, "step": 157 }, { "epoch": 0.07347698031312974, "grad_norm": 0.5064568519592285, "learning_rate": 2.4496124031007753e-06, "loss": 0.495, "step": 158 }, { "epoch": 0.07394202449232677, "grad_norm": 0.4576449692249298, "learning_rate": 2.4651162790697678e-06, "loss": 0.4551, "step": 159 }, { "epoch": 0.0744070686715238, "grad_norm": 0.4424917697906494, "learning_rate": 2.4806201550387598e-06, "loss": 0.4861, "step": 160 }, { "epoch": 0.07487211285072082, "grad_norm": 0.48563066124916077, "learning_rate": 2.496124031007752e-06, "loss": 0.4614, "step": 161 }, { "epoch": 0.07533715702991785, "grad_norm": 0.43007153272628784, "learning_rate": 2.5116279069767446e-06, "loss": 0.4822, "step": 162 }, { "epoch": 0.07580220120911486, "grad_norm": 0.41486161947250366, "learning_rate": 2.5271317829457366e-06, "loss": 0.4713, "step": 163 }, { "epoch": 0.0762672453883119, "grad_norm": 0.39590543508529663, "learning_rate": 2.5426356589147286e-06, "loss": 0.5051, "step": 164 }, { "epoch": 0.07673228956750891, "grad_norm": 0.4512157738208771, "learning_rate": 2.558139534883721e-06, "loss": 0.4597, "step": 165 }, { "epoch": 0.07719733374670594, "grad_norm": 0.5571392178535461, "learning_rate": 2.5736434108527135e-06, "loss": 0.4783, "step": 166 }, { "epoch": 0.07766237792590296, "grad_norm": 0.41649627685546875, "learning_rate": 2.5891472868217055e-06, "loss": 0.4648, "step": 167 }, { "epoch": 0.07812742210509999, "grad_norm": 0.45857080817222595, "learning_rate": 2.604651162790698e-06, "loss": 0.4793, "step": 168 }, { "epoch": 0.078592466284297, "grad_norm": 0.42742466926574707, "learning_rate": 2.6201550387596904e-06, "loss": 0.4894, "step": 169 }, { "epoch": 0.07905751046349403, "grad_norm": 0.48390820622444153, "learning_rate": 2.635658914728683e-06, "loss": 0.4707, "step": 170 }, { "epoch": 0.07952255464269106, "grad_norm": 0.5031663775444031, "learning_rate": 2.6511627906976744e-06, "loss": 0.4591, "step": 171 }, { "epoch": 0.07998759882188808, "grad_norm": 0.4769449830055237, "learning_rate": 2.666666666666667e-06, "loss": 0.4735, "step": 172 }, { "epoch": 0.08045264300108511, "grad_norm": 0.4525051712989807, "learning_rate": 2.6821705426356593e-06, "loss": 0.4655, "step": 173 }, { "epoch": 0.08091768718028212, "grad_norm": 0.4477654695510864, "learning_rate": 2.6976744186046517e-06, "loss": 0.461, "step": 174 }, { "epoch": 0.08138273135947915, "grad_norm": 0.45438140630722046, "learning_rate": 2.7131782945736433e-06, "loss": 0.4807, "step": 175 }, { "epoch": 0.08184777553867617, "grad_norm": 0.47995445132255554, "learning_rate": 2.7286821705426357e-06, "loss": 0.4768, "step": 176 }, { "epoch": 0.0823128197178732, "grad_norm": 0.40142083168029785, "learning_rate": 2.744186046511628e-06, "loss": 0.4814, "step": 177 }, { "epoch": 0.08277786389707022, "grad_norm": 0.4523696005344391, "learning_rate": 2.7596899224806206e-06, "loss": 0.4561, "step": 178 }, { "epoch": 0.08324290807626725, "grad_norm": 0.4552527368068695, "learning_rate": 2.7751937984496126e-06, "loss": 0.4796, "step": 179 }, { "epoch": 0.08370795225546426, "grad_norm": 0.4601724445819855, "learning_rate": 2.790697674418605e-06, "loss": 0.4733, "step": 180 }, { "epoch": 0.08417299643466129, "grad_norm": 0.4687198996543884, "learning_rate": 2.8062015503875974e-06, "loss": 0.4648, "step": 181 }, { "epoch": 0.08463804061385832, "grad_norm": 0.4488019347190857, "learning_rate": 2.8217054263565894e-06, "loss": 0.4794, "step": 182 }, { "epoch": 0.08510308479305534, "grad_norm": 0.462455153465271, "learning_rate": 2.8372093023255815e-06, "loss": 0.4699, "step": 183 }, { "epoch": 0.08556812897225237, "grad_norm": 0.43786558508872986, "learning_rate": 2.852713178294574e-06, "loss": 0.4807, "step": 184 }, { "epoch": 0.08603317315144939, "grad_norm": 0.4396842122077942, "learning_rate": 2.8682170542635663e-06, "loss": 0.4744, "step": 185 }, { "epoch": 0.08649821733064642, "grad_norm": 0.40669193863868713, "learning_rate": 2.8837209302325587e-06, "loss": 0.5, "step": 186 }, { "epoch": 0.08696326150984343, "grad_norm": 0.43069615960121155, "learning_rate": 2.8992248062015503e-06, "loss": 0.4666, "step": 187 }, { "epoch": 0.08742830568904046, "grad_norm": 0.3929974138736725, "learning_rate": 2.9147286821705428e-06, "loss": 0.4599, "step": 188 }, { "epoch": 0.08789334986823748, "grad_norm": 0.44300031661987305, "learning_rate": 2.930232558139535e-06, "loss": 0.4441, "step": 189 }, { "epoch": 0.08835839404743451, "grad_norm": 0.40551915764808655, "learning_rate": 2.9457364341085276e-06, "loss": 0.4536, "step": 190 }, { "epoch": 0.08882343822663152, "grad_norm": 0.45988717675209045, "learning_rate": 2.9612403100775196e-06, "loss": 0.4442, "step": 191 }, { "epoch": 0.08928848240582855, "grad_norm": 0.41964244842529297, "learning_rate": 2.9767441860465116e-06, "loss": 0.4294, "step": 192 }, { "epoch": 0.08975352658502558, "grad_norm": 0.4278603196144104, "learning_rate": 2.992248062015504e-06, "loss": 0.4512, "step": 193 }, { "epoch": 0.0902185707642226, "grad_norm": 0.4759548008441925, "learning_rate": 3.0077519379844965e-06, "loss": 0.4856, "step": 194 }, { "epoch": 0.09068361494341963, "grad_norm": 0.44720789790153503, "learning_rate": 3.0232558139534885e-06, "loss": 0.439, "step": 195 }, { "epoch": 0.09114865912261665, "grad_norm": 0.4375242590904236, "learning_rate": 3.038759689922481e-06, "loss": 0.4559, "step": 196 }, { "epoch": 0.09161370330181368, "grad_norm": 0.4382531940937042, "learning_rate": 3.0542635658914734e-06, "loss": 0.4651, "step": 197 }, { "epoch": 0.09207874748101069, "grad_norm": 0.46348533034324646, "learning_rate": 3.069767441860466e-06, "loss": 0.4594, "step": 198 }, { "epoch": 0.09254379166020772, "grad_norm": 0.5095227360725403, "learning_rate": 3.0852713178294574e-06, "loss": 0.4621, "step": 199 }, { "epoch": 0.09300883583940474, "grad_norm": 0.5287566781044006, "learning_rate": 3.10077519379845e-06, "loss": 0.4805, "step": 200 }, { "epoch": 0.09347388001860177, "grad_norm": 0.49240946769714355, "learning_rate": 3.1162790697674423e-06, "loss": 0.4836, "step": 201 }, { "epoch": 0.09393892419779878, "grad_norm": 0.4462595283985138, "learning_rate": 3.1317829457364347e-06, "loss": 0.4466, "step": 202 }, { "epoch": 0.09440396837699581, "grad_norm": 0.5251316428184509, "learning_rate": 3.1472868217054263e-06, "loss": 0.4754, "step": 203 }, { "epoch": 0.09486901255619284, "grad_norm": 0.516490638256073, "learning_rate": 3.1627906976744187e-06, "loss": 0.4768, "step": 204 }, { "epoch": 0.09533405673538986, "grad_norm": 0.455197274684906, "learning_rate": 3.178294573643411e-06, "loss": 0.449, "step": 205 }, { "epoch": 0.09579910091458689, "grad_norm": 0.4805566966533661, "learning_rate": 3.1937984496124036e-06, "loss": 0.4775, "step": 206 }, { "epoch": 0.0962641450937839, "grad_norm": 0.49571681022644043, "learning_rate": 3.2093023255813956e-06, "loss": 0.4297, "step": 207 }, { "epoch": 0.09672918927298094, "grad_norm": 0.43034252524375916, "learning_rate": 3.224806201550388e-06, "loss": 0.4941, "step": 208 }, { "epoch": 0.09719423345217795, "grad_norm": 0.539557933807373, "learning_rate": 3.24031007751938e-06, "loss": 0.4412, "step": 209 }, { "epoch": 0.09765927763137498, "grad_norm": 0.5281848907470703, "learning_rate": 3.2558139534883724e-06, "loss": 0.468, "step": 210 }, { "epoch": 0.098124321810572, "grad_norm": 0.4832194745540619, "learning_rate": 3.2713178294573644e-06, "loss": 0.4691, "step": 211 }, { "epoch": 0.09858936598976903, "grad_norm": 0.4533843696117401, "learning_rate": 3.286821705426357e-06, "loss": 0.4746, "step": 212 }, { "epoch": 0.09905441016896605, "grad_norm": 0.509421169757843, "learning_rate": 3.3023255813953493e-06, "loss": 0.4646, "step": 213 }, { "epoch": 0.09951945434816308, "grad_norm": 0.4463287889957428, "learning_rate": 3.3178294573643417e-06, "loss": 0.4763, "step": 214 }, { "epoch": 0.0999844985273601, "grad_norm": 0.443873792886734, "learning_rate": 3.3333333333333333e-06, "loss": 0.443, "step": 215 }, { "epoch": 0.10044954270655712, "grad_norm": 0.5233954191207886, "learning_rate": 3.3488372093023258e-06, "loss": 0.4574, "step": 216 }, { "epoch": 0.10091458688575415, "grad_norm": 0.4142616093158722, "learning_rate": 3.364341085271318e-06, "loss": 0.4304, "step": 217 }, { "epoch": 0.10137963106495117, "grad_norm": 0.4118039309978485, "learning_rate": 3.3798449612403106e-06, "loss": 0.4272, "step": 218 }, { "epoch": 0.1018446752441482, "grad_norm": 0.4856218099594116, "learning_rate": 3.3953488372093026e-06, "loss": 0.4834, "step": 219 }, { "epoch": 0.10230971942334521, "grad_norm": 0.4576835036277771, "learning_rate": 3.4108527131782946e-06, "loss": 0.4216, "step": 220 }, { "epoch": 0.10277476360254224, "grad_norm": 0.5164148211479187, "learning_rate": 3.426356589147287e-06, "loss": 0.4545, "step": 221 }, { "epoch": 0.10323980778173926, "grad_norm": 0.4515243470668793, "learning_rate": 3.4418604651162795e-06, "loss": 0.4515, "step": 222 }, { "epoch": 0.10370485196093629, "grad_norm": 0.4391280710697174, "learning_rate": 3.4573643410852715e-06, "loss": 0.4466, "step": 223 }, { "epoch": 0.10416989614013332, "grad_norm": 0.41174498200416565, "learning_rate": 3.472868217054264e-06, "loss": 0.4424, "step": 224 }, { "epoch": 0.10463494031933034, "grad_norm": 0.45546606183052063, "learning_rate": 3.4883720930232564e-06, "loss": 0.481, "step": 225 }, { "epoch": 0.10509998449852737, "grad_norm": 0.4433421194553375, "learning_rate": 3.5038759689922484e-06, "loss": 0.4586, "step": 226 }, { "epoch": 0.10556502867772438, "grad_norm": 0.46707531809806824, "learning_rate": 3.5193798449612404e-06, "loss": 0.4627, "step": 227 }, { "epoch": 0.10603007285692141, "grad_norm": 0.48355457186698914, "learning_rate": 3.534883720930233e-06, "loss": 0.4834, "step": 228 }, { "epoch": 0.10649511703611843, "grad_norm": 0.5134936571121216, "learning_rate": 3.5503875968992252e-06, "loss": 0.4496, "step": 229 }, { "epoch": 0.10696016121531546, "grad_norm": 0.4306149184703827, "learning_rate": 3.5658914728682177e-06, "loss": 0.4689, "step": 230 }, { "epoch": 0.10742520539451247, "grad_norm": 0.4603032171726227, "learning_rate": 3.5813953488372093e-06, "loss": 0.4489, "step": 231 }, { "epoch": 0.1078902495737095, "grad_norm": 0.46613025665283203, "learning_rate": 3.5968992248062017e-06, "loss": 0.4648, "step": 232 }, { "epoch": 0.10835529375290652, "grad_norm": 0.497821182012558, "learning_rate": 3.612403100775194e-06, "loss": 0.4454, "step": 233 }, { "epoch": 0.10882033793210355, "grad_norm": 0.44097408652305603, "learning_rate": 3.6279069767441866e-06, "loss": 0.4443, "step": 234 }, { "epoch": 0.10928538211130058, "grad_norm": 0.4849492013454437, "learning_rate": 3.6434108527131786e-06, "loss": 0.4546, "step": 235 }, { "epoch": 0.1097504262904976, "grad_norm": 0.4576839506626129, "learning_rate": 3.658914728682171e-06, "loss": 0.44, "step": 236 }, { "epoch": 0.11021547046969463, "grad_norm": 0.44836199283599854, "learning_rate": 3.674418604651163e-06, "loss": 0.4779, "step": 237 }, { "epoch": 0.11068051464889164, "grad_norm": 0.46997904777526855, "learning_rate": 3.6899224806201554e-06, "loss": 0.4755, "step": 238 }, { "epoch": 0.11114555882808867, "grad_norm": 0.4699132740497589, "learning_rate": 3.7054263565891474e-06, "loss": 0.4279, "step": 239 }, { "epoch": 0.11161060300728569, "grad_norm": 0.4158969819545746, "learning_rate": 3.72093023255814e-06, "loss": 0.4462, "step": 240 }, { "epoch": 0.11207564718648272, "grad_norm": 0.4518444240093231, "learning_rate": 3.7364341085271323e-06, "loss": 0.4419, "step": 241 }, { "epoch": 0.11254069136567973, "grad_norm": 0.43959298729896545, "learning_rate": 3.7519379844961247e-06, "loss": 0.4277, "step": 242 }, { "epoch": 0.11300573554487676, "grad_norm": 0.46462079882621765, "learning_rate": 3.7674418604651163e-06, "loss": 0.4666, "step": 243 }, { "epoch": 0.11347077972407378, "grad_norm": 0.44643181562423706, "learning_rate": 3.7829457364341087e-06, "loss": 0.4542, "step": 244 }, { "epoch": 0.11393582390327081, "grad_norm": 0.4474817216396332, "learning_rate": 3.798449612403101e-06, "loss": 0.4457, "step": 245 }, { "epoch": 0.11440086808246784, "grad_norm": 0.440893292427063, "learning_rate": 3.8139534883720936e-06, "loss": 0.4567, "step": 246 }, { "epoch": 0.11486591226166486, "grad_norm": 0.5157288908958435, "learning_rate": 3.829457364341085e-06, "loss": 0.4668, "step": 247 }, { "epoch": 0.11533095644086189, "grad_norm": 0.41228559613227844, "learning_rate": 3.844961240310078e-06, "loss": 0.4532, "step": 248 }, { "epoch": 0.1157960006200589, "grad_norm": 0.49492067098617554, "learning_rate": 3.86046511627907e-06, "loss": 0.4717, "step": 249 }, { "epoch": 0.11626104479925593, "grad_norm": 0.3908706605434418, "learning_rate": 3.875968992248063e-06, "loss": 0.4363, "step": 250 }, { "epoch": 0.11672608897845295, "grad_norm": 0.4512355625629425, "learning_rate": 3.891472868217054e-06, "loss": 0.461, "step": 251 }, { "epoch": 0.11719113315764998, "grad_norm": 0.45557087659835815, "learning_rate": 3.906976744186047e-06, "loss": 0.4479, "step": 252 }, { "epoch": 0.117656177336847, "grad_norm": 0.42376312613487244, "learning_rate": 3.922480620155039e-06, "loss": 0.4614, "step": 253 }, { "epoch": 0.11812122151604403, "grad_norm": 0.494684636592865, "learning_rate": 3.937984496124032e-06, "loss": 0.4528, "step": 254 }, { "epoch": 0.11858626569524104, "grad_norm": 0.46942609548568726, "learning_rate": 3.953488372093024e-06, "loss": 0.4363, "step": 255 }, { "epoch": 0.11905130987443807, "grad_norm": 0.4552278518676758, "learning_rate": 3.968992248062016e-06, "loss": 0.4462, "step": 256 }, { "epoch": 0.1195163540536351, "grad_norm": 0.4802659749984741, "learning_rate": 3.984496124031008e-06, "loss": 0.4694, "step": 257 }, { "epoch": 0.11998139823283212, "grad_norm": 0.49204587936401367, "learning_rate": 4.000000000000001e-06, "loss": 0.449, "step": 258 }, { "epoch": 0.12044644241202915, "grad_norm": 0.4670722484588623, "learning_rate": 4.015503875968993e-06, "loss": 0.4624, "step": 259 }, { "epoch": 0.12091148659122616, "grad_norm": 0.4534282386302948, "learning_rate": 4.031007751937985e-06, "loss": 0.4338, "step": 260 }, { "epoch": 0.1213765307704232, "grad_norm": 0.4467426538467407, "learning_rate": 4.0465116279069775e-06, "loss": 0.4513, "step": 261 }, { "epoch": 0.12184157494962021, "grad_norm": 0.4626162648200989, "learning_rate": 4.0620155038759695e-06, "loss": 0.4714, "step": 262 }, { "epoch": 0.12230661912881724, "grad_norm": 0.45661380887031555, "learning_rate": 4.0775193798449616e-06, "loss": 0.4559, "step": 263 }, { "epoch": 0.12277166330801426, "grad_norm": 0.5042157769203186, "learning_rate": 4.0930232558139536e-06, "loss": 0.4717, "step": 264 }, { "epoch": 0.12323670748721129, "grad_norm": 0.5059670805931091, "learning_rate": 4.108527131782946e-06, "loss": 0.4517, "step": 265 }, { "epoch": 0.1237017516664083, "grad_norm": 0.4609099328517914, "learning_rate": 4.124031007751938e-06, "loss": 0.4458, "step": 266 }, { "epoch": 0.12416679584560533, "grad_norm": 0.5168102979660034, "learning_rate": 4.1395348837209304e-06, "loss": 0.4068, "step": 267 }, { "epoch": 0.12463184002480236, "grad_norm": 0.4813331961631775, "learning_rate": 4.1550387596899224e-06, "loss": 0.4364, "step": 268 }, { "epoch": 0.1250968842039994, "grad_norm": 0.4824882745742798, "learning_rate": 4.170542635658915e-06, "loss": 0.4726, "step": 269 }, { "epoch": 0.1255619283831964, "grad_norm": 0.4692811965942383, "learning_rate": 4.186046511627907e-06, "loss": 0.462, "step": 270 }, { "epoch": 0.12602697256239342, "grad_norm": 0.473664253950119, "learning_rate": 4.201550387596899e-06, "loss": 0.4681, "step": 271 }, { "epoch": 0.12649201674159044, "grad_norm": 0.42069149017333984, "learning_rate": 4.217054263565892e-06, "loss": 0.4428, "step": 272 }, { "epoch": 0.12695706092078748, "grad_norm": 0.4619064927101135, "learning_rate": 4.232558139534884e-06, "loss": 0.4677, "step": 273 }, { "epoch": 0.1274221050999845, "grad_norm": 0.5394362807273865, "learning_rate": 4.248062015503876e-06, "loss": 0.4262, "step": 274 }, { "epoch": 0.12788714927918152, "grad_norm": 0.44251418113708496, "learning_rate": 4.263565891472868e-06, "loss": 0.4826, "step": 275 }, { "epoch": 0.12835219345837853, "grad_norm": 0.40653759241104126, "learning_rate": 4.279069767441861e-06, "loss": 0.4229, "step": 276 }, { "epoch": 0.12881723763757558, "grad_norm": 0.5286734104156494, "learning_rate": 4.294573643410853e-06, "loss": 0.4283, "step": 277 }, { "epoch": 0.1292822818167726, "grad_norm": 0.47520795464515686, "learning_rate": 4.310077519379846e-06, "loss": 0.4462, "step": 278 }, { "epoch": 0.1297473259959696, "grad_norm": 0.40976548194885254, "learning_rate": 4.325581395348837e-06, "loss": 0.4408, "step": 279 }, { "epoch": 0.13021237017516665, "grad_norm": 0.46813562512397766, "learning_rate": 4.34108527131783e-06, "loss": 0.4555, "step": 280 }, { "epoch": 0.13067741435436367, "grad_norm": 0.4154888689517975, "learning_rate": 4.356589147286822e-06, "loss": 0.4492, "step": 281 }, { "epoch": 0.13114245853356069, "grad_norm": 0.5551090240478516, "learning_rate": 4.372093023255815e-06, "loss": 0.4811, "step": 282 }, { "epoch": 0.1316075027127577, "grad_norm": 0.47705549001693726, "learning_rate": 4.387596899224806e-06, "loss": 0.436, "step": 283 }, { "epoch": 0.13207254689195475, "grad_norm": 0.4338889718055725, "learning_rate": 4.403100775193799e-06, "loss": 0.4636, "step": 284 }, { "epoch": 0.13253759107115176, "grad_norm": 0.49717769026756287, "learning_rate": 4.418604651162791e-06, "loss": 0.4257, "step": 285 }, { "epoch": 0.13300263525034878, "grad_norm": 0.4574042558670044, "learning_rate": 4.434108527131784e-06, "loss": 0.4135, "step": 286 }, { "epoch": 0.1334676794295458, "grad_norm": 0.4649648368358612, "learning_rate": 4.449612403100776e-06, "loss": 0.4557, "step": 287 }, { "epoch": 0.13393272360874284, "grad_norm": 0.43360039591789246, "learning_rate": 4.465116279069768e-06, "loss": 0.423, "step": 288 }, { "epoch": 0.13439776778793985, "grad_norm": 0.43359556794166565, "learning_rate": 4.4806201550387605e-06, "loss": 0.4346, "step": 289 }, { "epoch": 0.13486281196713687, "grad_norm": 0.4726753532886505, "learning_rate": 4.4961240310077525e-06, "loss": 0.4376, "step": 290 }, { "epoch": 0.1353278561463339, "grad_norm": 0.43972247838974, "learning_rate": 4.5116279069767445e-06, "loss": 0.4422, "step": 291 }, { "epoch": 0.13579290032553093, "grad_norm": 0.4216838479042053, "learning_rate": 4.5271317829457366e-06, "loss": 0.4275, "step": 292 }, { "epoch": 0.13625794450472795, "grad_norm": 0.4523601830005646, "learning_rate": 4.542635658914729e-06, "loss": 0.4524, "step": 293 }, { "epoch": 0.13672298868392496, "grad_norm": 0.4428309500217438, "learning_rate": 4.558139534883721e-06, "loss": 0.4247, "step": 294 }, { "epoch": 0.137188032863122, "grad_norm": 0.42991116642951965, "learning_rate": 4.573643410852713e-06, "loss": 0.4232, "step": 295 }, { "epoch": 0.13765307704231902, "grad_norm": 0.40712928771972656, "learning_rate": 4.5891472868217054e-06, "loss": 0.4426, "step": 296 }, { "epoch": 0.13811812122151604, "grad_norm": 0.45721739530563354, "learning_rate": 4.604651162790698e-06, "loss": 0.4666, "step": 297 }, { "epoch": 0.13858316540071305, "grad_norm": 0.41279730200767517, "learning_rate": 4.62015503875969e-06, "loss": 0.4258, "step": 298 }, { "epoch": 0.1390482095799101, "grad_norm": 0.47948822379112244, "learning_rate": 4.635658914728682e-06, "loss": 0.4285, "step": 299 }, { "epoch": 0.13951325375910711, "grad_norm": 0.45233485102653503, "learning_rate": 4.651162790697675e-06, "loss": 0.4522, "step": 300 }, { "epoch": 0.13997829793830413, "grad_norm": 0.4481063783168793, "learning_rate": 4.666666666666667e-06, "loss": 0.4332, "step": 301 }, { "epoch": 0.14044334211750117, "grad_norm": 0.4825620651245117, "learning_rate": 4.682170542635659e-06, "loss": 0.4643, "step": 302 }, { "epoch": 0.1409083862966982, "grad_norm": 0.4766821265220642, "learning_rate": 4.697674418604651e-06, "loss": 0.4433, "step": 303 }, { "epoch": 0.1413734304758952, "grad_norm": 0.46264582872390747, "learning_rate": 4.713178294573644e-06, "loss": 0.4409, "step": 304 }, { "epoch": 0.14183847465509222, "grad_norm": 0.4554003179073334, "learning_rate": 4.728682170542636e-06, "loss": 0.4215, "step": 305 }, { "epoch": 0.14230351883428927, "grad_norm": 0.45970794558525085, "learning_rate": 4.744186046511629e-06, "loss": 0.4259, "step": 306 }, { "epoch": 0.14276856301348628, "grad_norm": 0.4238308370113373, "learning_rate": 4.75968992248062e-06, "loss": 0.4723, "step": 307 }, { "epoch": 0.1432336071926833, "grad_norm": 0.4310249090194702, "learning_rate": 4.775193798449613e-06, "loss": 0.4526, "step": 308 }, { "epoch": 0.14369865137188031, "grad_norm": 0.4667305648326874, "learning_rate": 4.790697674418605e-06, "loss": 0.4631, "step": 309 }, { "epoch": 0.14416369555107736, "grad_norm": 0.465160071849823, "learning_rate": 4.806201550387598e-06, "loss": 0.4399, "step": 310 }, { "epoch": 0.14462873973027437, "grad_norm": 0.44416457414627075, "learning_rate": 4.821705426356589e-06, "loss": 0.4398, "step": 311 }, { "epoch": 0.1450937839094714, "grad_norm": 0.44952088594436646, "learning_rate": 4.837209302325582e-06, "loss": 0.4311, "step": 312 }, { "epoch": 0.14555882808866843, "grad_norm": 0.44717299938201904, "learning_rate": 4.852713178294574e-06, "loss": 0.4541, "step": 313 }, { "epoch": 0.14602387226786545, "grad_norm": 0.4240880310535431, "learning_rate": 4.868217054263567e-06, "loss": 0.4496, "step": 314 }, { "epoch": 0.14648891644706247, "grad_norm": 0.46114540100097656, "learning_rate": 4.883720930232559e-06, "loss": 0.4493, "step": 315 }, { "epoch": 0.14695396062625948, "grad_norm": 0.4437052309513092, "learning_rate": 4.899224806201551e-06, "loss": 0.435, "step": 316 }, { "epoch": 0.14741900480545653, "grad_norm": 0.458609938621521, "learning_rate": 4.9147286821705435e-06, "loss": 0.4451, "step": 317 }, { "epoch": 0.14788404898465354, "grad_norm": 0.4837147295475006, "learning_rate": 4.9302325581395355e-06, "loss": 0.4383, "step": 318 }, { "epoch": 0.14834909316385056, "grad_norm": 0.4129205048084259, "learning_rate": 4.9457364341085275e-06, "loss": 0.4556, "step": 319 }, { "epoch": 0.1488141373430476, "grad_norm": 0.5567089319229126, "learning_rate": 4.9612403100775195e-06, "loss": 0.4308, "step": 320 }, { "epoch": 0.14927918152224462, "grad_norm": 0.47763490676879883, "learning_rate": 4.976744186046512e-06, "loss": 0.4492, "step": 321 }, { "epoch": 0.14974422570144164, "grad_norm": 0.4176934063434601, "learning_rate": 4.992248062015504e-06, "loss": 0.4239, "step": 322 }, { "epoch": 0.15020926988063865, "grad_norm": 0.5204504728317261, "learning_rate": 5.007751937984496e-06, "loss": 0.4357, "step": 323 }, { "epoch": 0.1506743140598357, "grad_norm": 0.4915800988674164, "learning_rate": 5.023255813953489e-06, "loss": 0.4803, "step": 324 }, { "epoch": 0.1511393582390327, "grad_norm": 0.4427052438259125, "learning_rate": 5.038759689922481e-06, "loss": 0.4279, "step": 325 }, { "epoch": 0.15160440241822973, "grad_norm": 0.5173365473747253, "learning_rate": 5.054263565891473e-06, "loss": 0.4304, "step": 326 }, { "epoch": 0.15206944659742674, "grad_norm": 0.4465036988258362, "learning_rate": 5.069767441860466e-06, "loss": 0.4288, "step": 327 }, { "epoch": 0.1525344907766238, "grad_norm": 0.4534618556499481, "learning_rate": 5.085271317829457e-06, "loss": 0.4308, "step": 328 }, { "epoch": 0.1529995349558208, "grad_norm": 0.42236584424972534, "learning_rate": 5.100775193798449e-06, "loss": 0.4221, "step": 329 }, { "epoch": 0.15346457913501782, "grad_norm": 0.44343680143356323, "learning_rate": 5.116279069767442e-06, "loss": 0.4384, "step": 330 }, { "epoch": 0.15392962331421486, "grad_norm": 0.45353367924690247, "learning_rate": 5.131782945736434e-06, "loss": 0.4425, "step": 331 }, { "epoch": 0.15439466749341188, "grad_norm": 0.4323454797267914, "learning_rate": 5.147286821705427e-06, "loss": 0.4398, "step": 332 }, { "epoch": 0.1548597116726089, "grad_norm": 0.4849368929862976, "learning_rate": 5.162790697674419e-06, "loss": 0.4586, "step": 333 }, { "epoch": 0.1553247558518059, "grad_norm": 0.430549681186676, "learning_rate": 5.178294573643411e-06, "loss": 0.4308, "step": 334 }, { "epoch": 0.15578980003100296, "grad_norm": 0.4729780852794647, "learning_rate": 5.193798449612404e-06, "loss": 0.4485, "step": 335 }, { "epoch": 0.15625484421019997, "grad_norm": 0.45753175020217896, "learning_rate": 5.209302325581396e-06, "loss": 0.4205, "step": 336 }, { "epoch": 0.156719888389397, "grad_norm": 0.44031214714050293, "learning_rate": 5.224806201550388e-06, "loss": 0.4309, "step": 337 }, { "epoch": 0.157184932568594, "grad_norm": 0.5231649279594421, "learning_rate": 5.240310077519381e-06, "loss": 0.429, "step": 338 }, { "epoch": 0.15764997674779105, "grad_norm": 0.48316752910614014, "learning_rate": 5.255813953488372e-06, "loss": 0.4651, "step": 339 }, { "epoch": 0.15811502092698806, "grad_norm": 0.4905528426170349, "learning_rate": 5.271317829457366e-06, "loss": 0.4233, "step": 340 }, { "epoch": 0.15858006510618508, "grad_norm": 0.44905000925064087, "learning_rate": 5.286821705426357e-06, "loss": 0.4741, "step": 341 }, { "epoch": 0.15904510928538212, "grad_norm": 0.4897783696651459, "learning_rate": 5.302325581395349e-06, "loss": 0.4568, "step": 342 }, { "epoch": 0.15951015346457914, "grad_norm": 0.4445964992046356, "learning_rate": 5.317829457364342e-06, "loss": 0.4294, "step": 343 }, { "epoch": 0.15997519764377616, "grad_norm": 0.48060405254364014, "learning_rate": 5.333333333333334e-06, "loss": 0.4612, "step": 344 }, { "epoch": 0.16044024182297317, "grad_norm": 0.4548921585083008, "learning_rate": 5.348837209302326e-06, "loss": 0.4569, "step": 345 }, { "epoch": 0.16090528600217022, "grad_norm": 0.47072821855545044, "learning_rate": 5.3643410852713185e-06, "loss": 0.4403, "step": 346 }, { "epoch": 0.16137033018136723, "grad_norm": 0.40528589487075806, "learning_rate": 5.3798449612403105e-06, "loss": 0.4524, "step": 347 }, { "epoch": 0.16183537436056425, "grad_norm": 0.4871981739997864, "learning_rate": 5.395348837209303e-06, "loss": 0.4402, "step": 348 }, { "epoch": 0.16230041853976127, "grad_norm": 0.4808438718318939, "learning_rate": 5.410852713178295e-06, "loss": 0.475, "step": 349 }, { "epoch": 0.1627654627189583, "grad_norm": 0.4704870283603668, "learning_rate": 5.4263565891472865e-06, "loss": 0.432, "step": 350 }, { "epoch": 0.16323050689815533, "grad_norm": 0.5283604860305786, "learning_rate": 5.44186046511628e-06, "loss": 0.4388, "step": 351 }, { "epoch": 0.16369555107735234, "grad_norm": 0.4891282618045807, "learning_rate": 5.457364341085271e-06, "loss": 0.4561, "step": 352 }, { "epoch": 0.16416059525654939, "grad_norm": 0.5148476362228394, "learning_rate": 5.472868217054263e-06, "loss": 0.4419, "step": 353 }, { "epoch": 0.1646256394357464, "grad_norm": 0.43338412046432495, "learning_rate": 5.488372093023256e-06, "loss": 0.4295, "step": 354 }, { "epoch": 0.16509068361494342, "grad_norm": 0.5159112811088562, "learning_rate": 5.503875968992248e-06, "loss": 0.4322, "step": 355 }, { "epoch": 0.16555572779414043, "grad_norm": 0.4641515910625458, "learning_rate": 5.519379844961241e-06, "loss": 0.4303, "step": 356 }, { "epoch": 0.16602077197333748, "grad_norm": 0.47173258662223816, "learning_rate": 5.534883720930233e-06, "loss": 0.4437, "step": 357 }, { "epoch": 0.1664858161525345, "grad_norm": 0.5051222443580627, "learning_rate": 5.550387596899225e-06, "loss": 0.4265, "step": 358 }, { "epoch": 0.1669508603317315, "grad_norm": 0.4815625250339508, "learning_rate": 5.565891472868218e-06, "loss": 0.4446, "step": 359 }, { "epoch": 0.16741590451092853, "grad_norm": 0.5002831816673279, "learning_rate": 5.58139534883721e-06, "loss": 0.4428, "step": 360 }, { "epoch": 0.16788094869012557, "grad_norm": 0.47330793738365173, "learning_rate": 5.596899224806201e-06, "loss": 0.4442, "step": 361 }, { "epoch": 0.16834599286932259, "grad_norm": 0.4468974173069, "learning_rate": 5.612403100775195e-06, "loss": 0.4482, "step": 362 }, { "epoch": 0.1688110370485196, "grad_norm": 0.46247512102127075, "learning_rate": 5.627906976744186e-06, "loss": 0.4534, "step": 363 }, { "epoch": 0.16927608122771665, "grad_norm": 0.4856452941894531, "learning_rate": 5.643410852713179e-06, "loss": 0.4304, "step": 364 }, { "epoch": 0.16974112540691366, "grad_norm": 0.48045817017555237, "learning_rate": 5.658914728682171e-06, "loss": 0.4347, "step": 365 }, { "epoch": 0.17020616958611068, "grad_norm": 0.4817154109477997, "learning_rate": 5.674418604651163e-06, "loss": 0.455, "step": 366 }, { "epoch": 0.1706712137653077, "grad_norm": 0.4779435396194458, "learning_rate": 5.689922480620156e-06, "loss": 0.398, "step": 367 }, { "epoch": 0.17113625794450474, "grad_norm": 0.5880154371261597, "learning_rate": 5.705426356589148e-06, "loss": 0.4537, "step": 368 }, { "epoch": 0.17160130212370175, "grad_norm": 0.53252112865448, "learning_rate": 5.72093023255814e-06, "loss": 0.446, "step": 369 }, { "epoch": 0.17206634630289877, "grad_norm": 0.4977021813392639, "learning_rate": 5.736434108527133e-06, "loss": 0.4353, "step": 370 }, { "epoch": 0.1725313904820958, "grad_norm": 0.5705452561378479, "learning_rate": 5.751937984496125e-06, "loss": 0.444, "step": 371 }, { "epoch": 0.17299643466129283, "grad_norm": 0.4966931641101837, "learning_rate": 5.7674418604651175e-06, "loss": 0.4264, "step": 372 }, { "epoch": 0.17346147884048985, "grad_norm": 0.5850646495819092, "learning_rate": 5.782945736434109e-06, "loss": 0.4223, "step": 373 }, { "epoch": 0.17392652301968686, "grad_norm": 0.469243586063385, "learning_rate": 5.798449612403101e-06, "loss": 0.4206, "step": 374 }, { "epoch": 0.1743915671988839, "grad_norm": 0.5241683125495911, "learning_rate": 5.8139534883720935e-06, "loss": 0.4422, "step": 375 }, { "epoch": 0.17485661137808092, "grad_norm": 0.5510844588279724, "learning_rate": 5.8294573643410855e-06, "loss": 0.4335, "step": 376 }, { "epoch": 0.17532165555727794, "grad_norm": 0.4422820508480072, "learning_rate": 5.8449612403100775e-06, "loss": 0.444, "step": 377 }, { "epoch": 0.17578669973647496, "grad_norm": 0.4676722586154938, "learning_rate": 5.86046511627907e-06, "loss": 0.4477, "step": 378 }, { "epoch": 0.176251743915672, "grad_norm": 0.41679903864860535, "learning_rate": 5.875968992248062e-06, "loss": 0.4085, "step": 379 }, { "epoch": 0.17671678809486902, "grad_norm": 0.4864595830440521, "learning_rate": 5.891472868217055e-06, "loss": 0.4067, "step": 380 }, { "epoch": 0.17718183227406603, "grad_norm": 0.47440460324287415, "learning_rate": 5.906976744186047e-06, "loss": 0.4496, "step": 381 }, { "epoch": 0.17764687645326305, "grad_norm": 0.49285659193992615, "learning_rate": 5.922480620155039e-06, "loss": 0.4476, "step": 382 }, { "epoch": 0.1781119206324601, "grad_norm": 0.4602559804916382, "learning_rate": 5.937984496124032e-06, "loss": 0.4111, "step": 383 }, { "epoch": 0.1785769648116571, "grad_norm": 0.4376397728919983, "learning_rate": 5.953488372093023e-06, "loss": 0.4232, "step": 384 }, { "epoch": 0.17904200899085412, "grad_norm": 0.4615304470062256, "learning_rate": 5.968992248062015e-06, "loss": 0.4413, "step": 385 }, { "epoch": 0.17950705317005117, "grad_norm": 0.49313583970069885, "learning_rate": 5.984496124031008e-06, "loss": 0.4426, "step": 386 }, { "epoch": 0.17997209734924818, "grad_norm": 0.4725257158279419, "learning_rate": 6e-06, "loss": 0.4515, "step": 387 }, { "epoch": 0.1804371415284452, "grad_norm": 0.44713687896728516, "learning_rate": 6.015503875968993e-06, "loss": 0.4232, "step": 388 }, { "epoch": 0.18090218570764222, "grad_norm": 0.444200724363327, "learning_rate": 6.031007751937985e-06, "loss": 0.4353, "step": 389 }, { "epoch": 0.18136722988683926, "grad_norm": 0.4371640980243683, "learning_rate": 6.046511627906977e-06, "loss": 0.4257, "step": 390 }, { "epoch": 0.18183227406603628, "grad_norm": 0.5189304947853088, "learning_rate": 6.06201550387597e-06, "loss": 0.4365, "step": 391 }, { "epoch": 0.1822973182452333, "grad_norm": 0.4122354984283447, "learning_rate": 6.077519379844962e-06, "loss": 0.4226, "step": 392 }, { "epoch": 0.1827623624244303, "grad_norm": 0.47240152955055237, "learning_rate": 6.093023255813954e-06, "loss": 0.4387, "step": 393 }, { "epoch": 0.18322740660362735, "grad_norm": 0.46310725808143616, "learning_rate": 6.108527131782947e-06, "loss": 0.4572, "step": 394 }, { "epoch": 0.18369245078282437, "grad_norm": 0.4549012780189514, "learning_rate": 6.124031007751938e-06, "loss": 0.4321, "step": 395 }, { "epoch": 0.18415749496202138, "grad_norm": 0.5056280493736267, "learning_rate": 6.139534883720932e-06, "loss": 0.4233, "step": 396 }, { "epoch": 0.18462253914121843, "grad_norm": 0.4270991086959839, "learning_rate": 6.155038759689923e-06, "loss": 0.4204, "step": 397 }, { "epoch": 0.18508758332041544, "grad_norm": 0.47168463468551636, "learning_rate": 6.170542635658915e-06, "loss": 0.3967, "step": 398 }, { "epoch": 0.18555262749961246, "grad_norm": 0.5067258477210999, "learning_rate": 6.186046511627908e-06, "loss": 0.4273, "step": 399 }, { "epoch": 0.18601767167880948, "grad_norm": 0.46395808458328247, "learning_rate": 6.2015503875969e-06, "loss": 0.4256, "step": 400 }, { "epoch": 0.18648271585800652, "grad_norm": 0.46003836393356323, "learning_rate": 6.217054263565892e-06, "loss": 0.41, "step": 401 }, { "epoch": 0.18694776003720354, "grad_norm": 0.5008445382118225, "learning_rate": 6.2325581395348845e-06, "loss": 0.3952, "step": 402 }, { "epoch": 0.18741280421640055, "grad_norm": 0.5617318153381348, "learning_rate": 6.2480620155038765e-06, "loss": 0.4104, "step": 403 }, { "epoch": 0.18787784839559757, "grad_norm": 0.49167153239250183, "learning_rate": 6.263565891472869e-06, "loss": 0.4252, "step": 404 }, { "epoch": 0.1883428925747946, "grad_norm": 0.4648562967777252, "learning_rate": 6.279069767441861e-06, "loss": 0.4485, "step": 405 }, { "epoch": 0.18880793675399163, "grad_norm": 0.48742571473121643, "learning_rate": 6.2945736434108525e-06, "loss": 0.4088, "step": 406 }, { "epoch": 0.18927298093318864, "grad_norm": 0.4988233745098114, "learning_rate": 6.310077519379845e-06, "loss": 0.4159, "step": 407 }, { "epoch": 0.1897380251123857, "grad_norm": 0.45389029383659363, "learning_rate": 6.325581395348837e-06, "loss": 0.45, "step": 408 }, { "epoch": 0.1902030692915827, "grad_norm": 0.48481884598731995, "learning_rate": 6.341085271317829e-06, "loss": 0.4327, "step": 409 }, { "epoch": 0.19066811347077972, "grad_norm": 0.5068842768669128, "learning_rate": 6.356589147286822e-06, "loss": 0.4068, "step": 410 }, { "epoch": 0.19113315764997674, "grad_norm": 0.5590021014213562, "learning_rate": 6.372093023255814e-06, "loss": 0.4622, "step": 411 }, { "epoch": 0.19159820182917378, "grad_norm": 0.4978815019130707, "learning_rate": 6.387596899224807e-06, "loss": 0.4285, "step": 412 }, { "epoch": 0.1920632460083708, "grad_norm": 0.48937058448791504, "learning_rate": 6.403100775193799e-06, "loss": 0.432, "step": 413 }, { "epoch": 0.1925282901875678, "grad_norm": 0.47953492403030396, "learning_rate": 6.418604651162791e-06, "loss": 0.428, "step": 414 }, { "epoch": 0.19299333436676483, "grad_norm": 0.5264999270439148, "learning_rate": 6.434108527131784e-06, "loss": 0.4338, "step": 415 }, { "epoch": 0.19345837854596187, "grad_norm": 0.5225833058357239, "learning_rate": 6.449612403100776e-06, "loss": 0.4423, "step": 416 }, { "epoch": 0.1939234227251589, "grad_norm": 0.43435758352279663, "learning_rate": 6.465116279069767e-06, "loss": 0.4034, "step": 417 }, { "epoch": 0.1943884669043559, "grad_norm": 0.4764443337917328, "learning_rate": 6.48062015503876e-06, "loss": 0.4623, "step": 418 }, { "epoch": 0.19485351108355295, "grad_norm": 0.49896514415740967, "learning_rate": 6.496124031007752e-06, "loss": 0.4413, "step": 419 }, { "epoch": 0.19531855526274997, "grad_norm": 0.46910813450813293, "learning_rate": 6.511627906976745e-06, "loss": 0.4089, "step": 420 }, { "epoch": 0.19578359944194698, "grad_norm": 0.46757951378822327, "learning_rate": 6.527131782945737e-06, "loss": 0.42, "step": 421 }, { "epoch": 0.196248643621144, "grad_norm": 0.4977341890335083, "learning_rate": 6.542635658914729e-06, "loss": 0.4167, "step": 422 }, { "epoch": 0.19671368780034104, "grad_norm": 0.4749394357204437, "learning_rate": 6.558139534883722e-06, "loss": 0.4044, "step": 423 }, { "epoch": 0.19717873197953806, "grad_norm": 0.461539626121521, "learning_rate": 6.573643410852714e-06, "loss": 0.4022, "step": 424 }, { "epoch": 0.19764377615873507, "grad_norm": 0.5062358379364014, "learning_rate": 6.589147286821706e-06, "loss": 0.435, "step": 425 }, { "epoch": 0.1981088203379321, "grad_norm": 0.42745712399482727, "learning_rate": 6.604651162790699e-06, "loss": 0.4393, "step": 426 }, { "epoch": 0.19857386451712913, "grad_norm": 0.43482205271720886, "learning_rate": 6.620155038759691e-06, "loss": 0.4132, "step": 427 }, { "epoch": 0.19903890869632615, "grad_norm": 0.452120840549469, "learning_rate": 6.6356589147286835e-06, "loss": 0.4207, "step": 428 }, { "epoch": 0.19950395287552317, "grad_norm": 0.453553169965744, "learning_rate": 6.651162790697675e-06, "loss": 0.4211, "step": 429 }, { "epoch": 0.1999689970547202, "grad_norm": 0.49294397234916687, "learning_rate": 6.666666666666667e-06, "loss": 0.4354, "step": 430 }, { "epoch": 0.20043404123391723, "grad_norm": 0.46980249881744385, "learning_rate": 6.6821705426356595e-06, "loss": 0.4315, "step": 431 }, { "epoch": 0.20089908541311424, "grad_norm": 0.48995155096054077, "learning_rate": 6.6976744186046515e-06, "loss": 0.4428, "step": 432 }, { "epoch": 0.20136412959231126, "grad_norm": 0.4816022515296936, "learning_rate": 6.7131782945736435e-06, "loss": 0.4411, "step": 433 }, { "epoch": 0.2018291737715083, "grad_norm": 0.49030253291130066, "learning_rate": 6.728682170542636e-06, "loss": 0.409, "step": 434 }, { "epoch": 0.20229421795070532, "grad_norm": 0.5009063482284546, "learning_rate": 6.744186046511628e-06, "loss": 0.4306, "step": 435 }, { "epoch": 0.20275926212990233, "grad_norm": 0.49570536613464355, "learning_rate": 6.759689922480621e-06, "loss": 0.4166, "step": 436 }, { "epoch": 0.20322430630909935, "grad_norm": 0.5443177223205566, "learning_rate": 6.775193798449613e-06, "loss": 0.4188, "step": 437 }, { "epoch": 0.2036893504882964, "grad_norm": 0.46004390716552734, "learning_rate": 6.790697674418605e-06, "loss": 0.4149, "step": 438 }, { "epoch": 0.2041543946674934, "grad_norm": 0.5454478859901428, "learning_rate": 6.806201550387598e-06, "loss": 0.4153, "step": 439 }, { "epoch": 0.20461943884669043, "grad_norm": 0.6096868515014648, "learning_rate": 6.821705426356589e-06, "loss": 0.4373, "step": 440 }, { "epoch": 0.20508448302588747, "grad_norm": 0.4331457018852234, "learning_rate": 6.837209302325581e-06, "loss": 0.4273, "step": 441 }, { "epoch": 0.2055495272050845, "grad_norm": 0.5101227760314941, "learning_rate": 6.852713178294574e-06, "loss": 0.4174, "step": 442 }, { "epoch": 0.2060145713842815, "grad_norm": 0.47638994455337524, "learning_rate": 6.868217054263566e-06, "loss": 0.4332, "step": 443 }, { "epoch": 0.20647961556347852, "grad_norm": 0.48688259720802307, "learning_rate": 6.883720930232559e-06, "loss": 0.4269, "step": 444 }, { "epoch": 0.20694465974267556, "grad_norm": 0.5044205188751221, "learning_rate": 6.899224806201551e-06, "loss": 0.4269, "step": 445 }, { "epoch": 0.20740970392187258, "grad_norm": 0.5487541556358337, "learning_rate": 6.914728682170543e-06, "loss": 0.4411, "step": 446 }, { "epoch": 0.2078747481010696, "grad_norm": 0.47763657569885254, "learning_rate": 6.930232558139536e-06, "loss": 0.4382, "step": 447 }, { "epoch": 0.20833979228026664, "grad_norm": 0.49321773648262024, "learning_rate": 6.945736434108528e-06, "loss": 0.4441, "step": 448 }, { "epoch": 0.20880483645946366, "grad_norm": 0.4817297160625458, "learning_rate": 6.961240310077519e-06, "loss": 0.4286, "step": 449 }, { "epoch": 0.20926988063866067, "grad_norm": 0.5427500605583191, "learning_rate": 6.976744186046513e-06, "loss": 0.4021, "step": 450 }, { "epoch": 0.2097349248178577, "grad_norm": 0.5691028237342834, "learning_rate": 6.992248062015504e-06, "loss": 0.4075, "step": 451 }, { "epoch": 0.21019996899705473, "grad_norm": 0.4561224579811096, "learning_rate": 7.007751937984497e-06, "loss": 0.4075, "step": 452 }, { "epoch": 0.21066501317625175, "grad_norm": 0.5992524027824402, "learning_rate": 7.023255813953489e-06, "loss": 0.4214, "step": 453 }, { "epoch": 0.21113005735544876, "grad_norm": 0.6167622208595276, "learning_rate": 7.038759689922481e-06, "loss": 0.3898, "step": 454 }, { "epoch": 0.21159510153464578, "grad_norm": 0.5301486253738403, "learning_rate": 7.054263565891474e-06, "loss": 0.4306, "step": 455 }, { "epoch": 0.21206014571384282, "grad_norm": 0.6576060652732849, "learning_rate": 7.069767441860466e-06, "loss": 0.4162, "step": 456 }, { "epoch": 0.21252518989303984, "grad_norm": 0.5479761958122253, "learning_rate": 7.085271317829458e-06, "loss": 0.4585, "step": 457 }, { "epoch": 0.21299023407223686, "grad_norm": 0.47175103425979614, "learning_rate": 7.1007751937984505e-06, "loss": 0.4103, "step": 458 }, { "epoch": 0.2134552782514339, "grad_norm": 0.5980085730552673, "learning_rate": 7.1162790697674425e-06, "loss": 0.4308, "step": 459 }, { "epoch": 0.21392032243063092, "grad_norm": 0.49691030383110046, "learning_rate": 7.131782945736435e-06, "loss": 0.4177, "step": 460 }, { "epoch": 0.21438536660982793, "grad_norm": 0.4692418575286865, "learning_rate": 7.147286821705427e-06, "loss": 0.4184, "step": 461 }, { "epoch": 0.21485041078902495, "grad_norm": 0.4812504053115845, "learning_rate": 7.1627906976744185e-06, "loss": 0.4077, "step": 462 }, { "epoch": 0.215315454968222, "grad_norm": 0.47342851758003235, "learning_rate": 7.178294573643411e-06, "loss": 0.4117, "step": 463 }, { "epoch": 0.215780499147419, "grad_norm": 0.5251373648643494, "learning_rate": 7.193798449612403e-06, "loss": 0.451, "step": 464 }, { "epoch": 0.21624554332661602, "grad_norm": 0.4466230869293213, "learning_rate": 7.209302325581395e-06, "loss": 0.4364, "step": 465 }, { "epoch": 0.21671058750581304, "grad_norm": 0.502120852470398, "learning_rate": 7.224806201550388e-06, "loss": 0.451, "step": 466 }, { "epoch": 0.21717563168501008, "grad_norm": 0.5081861615180969, "learning_rate": 7.24031007751938e-06, "loss": 0.4521, "step": 467 }, { "epoch": 0.2176406758642071, "grad_norm": 0.5037353038787842, "learning_rate": 7.255813953488373e-06, "loss": 0.4055, "step": 468 }, { "epoch": 0.21810572004340412, "grad_norm": 0.48872482776641846, "learning_rate": 7.271317829457365e-06, "loss": 0.4353, "step": 469 }, { "epoch": 0.21857076422260116, "grad_norm": 0.4901602268218994, "learning_rate": 7.286821705426357e-06, "loss": 0.4176, "step": 470 }, { "epoch": 0.21903580840179818, "grad_norm": 0.4682338535785675, "learning_rate": 7.30232558139535e-06, "loss": 0.394, "step": 471 }, { "epoch": 0.2195008525809952, "grad_norm": 0.5010263919830322, "learning_rate": 7.317829457364342e-06, "loss": 0.4417, "step": 472 }, { "epoch": 0.2199658967601922, "grad_norm": 0.4669908285140991, "learning_rate": 7.333333333333333e-06, "loss": 0.395, "step": 473 }, { "epoch": 0.22043094093938925, "grad_norm": 0.47183507680892944, "learning_rate": 7.348837209302326e-06, "loss": 0.4388, "step": 474 }, { "epoch": 0.22089598511858627, "grad_norm": 0.49007242918014526, "learning_rate": 7.364341085271318e-06, "loss": 0.4062, "step": 475 }, { "epoch": 0.22136102929778328, "grad_norm": 0.5357131361961365, "learning_rate": 7.379844961240311e-06, "loss": 0.4414, "step": 476 }, { "epoch": 0.2218260734769803, "grad_norm": 0.5643349289894104, "learning_rate": 7.395348837209303e-06, "loss": 0.4158, "step": 477 }, { "epoch": 0.22229111765617734, "grad_norm": 0.4729050099849701, "learning_rate": 7.410852713178295e-06, "loss": 0.4273, "step": 478 }, { "epoch": 0.22275616183537436, "grad_norm": 0.485498309135437, "learning_rate": 7.426356589147288e-06, "loss": 0.408, "step": 479 }, { "epoch": 0.22322120601457138, "grad_norm": 0.5941533446311951, "learning_rate": 7.44186046511628e-06, "loss": 0.4191, "step": 480 }, { "epoch": 0.22368625019376842, "grad_norm": 0.4895523488521576, "learning_rate": 7.457364341085272e-06, "loss": 0.4311, "step": 481 }, { "epoch": 0.22415129437296544, "grad_norm": 0.6243600249290466, "learning_rate": 7.472868217054265e-06, "loss": 0.4214, "step": 482 }, { "epoch": 0.22461633855216245, "grad_norm": 0.45703691244125366, "learning_rate": 7.488372093023256e-06, "loss": 0.4056, "step": 483 }, { "epoch": 0.22508138273135947, "grad_norm": 0.5641500353813171, "learning_rate": 7.5038759689922495e-06, "loss": 0.4293, "step": 484 }, { "epoch": 0.2255464269105565, "grad_norm": 0.5574773550033569, "learning_rate": 7.519379844961241e-06, "loss": 0.4195, "step": 485 }, { "epoch": 0.22601147108975353, "grad_norm": 0.5667125582695007, "learning_rate": 7.534883720930233e-06, "loss": 0.4228, "step": 486 }, { "epoch": 0.22647651526895055, "grad_norm": 0.4847676455974579, "learning_rate": 7.5503875968992255e-06, "loss": 0.4179, "step": 487 }, { "epoch": 0.22694155944814756, "grad_norm": 0.51683509349823, "learning_rate": 7.5658914728682175e-06, "loss": 0.4035, "step": 488 }, { "epoch": 0.2274066036273446, "grad_norm": 0.5952682495117188, "learning_rate": 7.5813953488372095e-06, "loss": 0.4308, "step": 489 }, { "epoch": 0.22787164780654162, "grad_norm": 0.5058827996253967, "learning_rate": 7.596899224806202e-06, "loss": 0.4276, "step": 490 }, { "epoch": 0.22833669198573864, "grad_norm": 0.7033687233924866, "learning_rate": 7.612403100775194e-06, "loss": 0.4345, "step": 491 }, { "epoch": 0.22880173616493568, "grad_norm": 0.5743710398674011, "learning_rate": 7.627906976744187e-06, "loss": 0.4216, "step": 492 }, { "epoch": 0.2292667803441327, "grad_norm": 0.637058675289154, "learning_rate": 7.643410852713178e-06, "loss": 0.439, "step": 493 }, { "epoch": 0.22973182452332971, "grad_norm": 0.49991941452026367, "learning_rate": 7.65891472868217e-06, "loss": 0.4383, "step": 494 }, { "epoch": 0.23019686870252673, "grad_norm": 0.6461667418479919, "learning_rate": 7.674418604651164e-06, "loss": 0.4542, "step": 495 }, { "epoch": 0.23066191288172377, "grad_norm": 0.5306419134140015, "learning_rate": 7.689922480620156e-06, "loss": 0.4266, "step": 496 }, { "epoch": 0.2311269570609208, "grad_norm": 0.5668182969093323, "learning_rate": 7.705426356589148e-06, "loss": 0.4369, "step": 497 }, { "epoch": 0.2315920012401178, "grad_norm": 0.546444833278656, "learning_rate": 7.72093023255814e-06, "loss": 0.4281, "step": 498 }, { "epoch": 0.23205704541931482, "grad_norm": 0.5376360416412354, "learning_rate": 7.736434108527132e-06, "loss": 0.4138, "step": 499 }, { "epoch": 0.23252208959851187, "grad_norm": 0.6407905220985413, "learning_rate": 7.751937984496126e-06, "loss": 0.4204, "step": 500 }, { "epoch": 0.23298713377770888, "grad_norm": 0.5134121179580688, "learning_rate": 7.767441860465116e-06, "loss": 0.4251, "step": 501 }, { "epoch": 0.2334521779569059, "grad_norm": 0.6588038206100464, "learning_rate": 7.782945736434108e-06, "loss": 0.423, "step": 502 }, { "epoch": 0.23391722213610294, "grad_norm": 0.5844888687133789, "learning_rate": 7.798449612403102e-06, "loss": 0.4362, "step": 503 }, { "epoch": 0.23438226631529996, "grad_norm": 0.5324265956878662, "learning_rate": 7.813953488372094e-06, "loss": 0.4184, "step": 504 }, { "epoch": 0.23484731049449697, "grad_norm": 0.6014962196350098, "learning_rate": 7.829457364341086e-06, "loss": 0.4314, "step": 505 }, { "epoch": 0.235312354673694, "grad_norm": 0.51766037940979, "learning_rate": 7.844961240310078e-06, "loss": 0.405, "step": 506 }, { "epoch": 0.23577739885289103, "grad_norm": 0.5190640091896057, "learning_rate": 7.86046511627907e-06, "loss": 0.4085, "step": 507 }, { "epoch": 0.23624244303208805, "grad_norm": 0.5045374631881714, "learning_rate": 7.875968992248064e-06, "loss": 0.4219, "step": 508 }, { "epoch": 0.23670748721128507, "grad_norm": 0.557181179523468, "learning_rate": 7.891472868217056e-06, "loss": 0.4026, "step": 509 }, { "epoch": 0.23717253139048208, "grad_norm": 0.5194858908653259, "learning_rate": 7.906976744186048e-06, "loss": 0.4232, "step": 510 }, { "epoch": 0.23763757556967913, "grad_norm": 0.5417995452880859, "learning_rate": 7.92248062015504e-06, "loss": 0.416, "step": 511 }, { "epoch": 0.23810261974887614, "grad_norm": 0.48320630192756653, "learning_rate": 7.937984496124032e-06, "loss": 0.3952, "step": 512 }, { "epoch": 0.23856766392807316, "grad_norm": 0.5941821336746216, "learning_rate": 7.953488372093024e-06, "loss": 0.4101, "step": 513 }, { "epoch": 0.2390327081072702, "grad_norm": 0.5255295634269714, "learning_rate": 7.968992248062016e-06, "loss": 0.4091, "step": 514 }, { "epoch": 0.23949775228646722, "grad_norm": 0.4808565378189087, "learning_rate": 7.984496124031008e-06, "loss": 0.4147, "step": 515 }, { "epoch": 0.23996279646566424, "grad_norm": 0.6180495023727417, "learning_rate": 8.000000000000001e-06, "loss": 0.4193, "step": 516 }, { "epoch": 0.24042784064486125, "grad_norm": 0.6234874725341797, "learning_rate": 8.015503875968993e-06, "loss": 0.4413, "step": 517 }, { "epoch": 0.2408928848240583, "grad_norm": 0.49461182951927185, "learning_rate": 8.031007751937985e-06, "loss": 0.3924, "step": 518 }, { "epoch": 0.2413579290032553, "grad_norm": 0.6341349482536316, "learning_rate": 8.046511627906977e-06, "loss": 0.4284, "step": 519 }, { "epoch": 0.24182297318245233, "grad_norm": 0.5244817137718201, "learning_rate": 8.06201550387597e-06, "loss": 0.4037, "step": 520 }, { "epoch": 0.24228801736164934, "grad_norm": 0.5345357656478882, "learning_rate": 8.077519379844961e-06, "loss": 0.4568, "step": 521 }, { "epoch": 0.2427530615408464, "grad_norm": 0.685580849647522, "learning_rate": 8.093023255813955e-06, "loss": 0.4431, "step": 522 }, { "epoch": 0.2432181057200434, "grad_norm": 0.5502837896347046, "learning_rate": 8.108527131782945e-06, "loss": 0.4197, "step": 523 }, { "epoch": 0.24368314989924042, "grad_norm": 0.5113924741744995, "learning_rate": 8.124031007751939e-06, "loss": 0.4108, "step": 524 }, { "epoch": 0.24414819407843746, "grad_norm": 0.5599501729011536, "learning_rate": 8.139534883720931e-06, "loss": 0.4374, "step": 525 }, { "epoch": 0.24461323825763448, "grad_norm": 0.5084413886070251, "learning_rate": 8.155038759689923e-06, "loss": 0.4296, "step": 526 }, { "epoch": 0.2450782824368315, "grad_norm": 0.49575719237327576, "learning_rate": 8.170542635658915e-06, "loss": 0.4252, "step": 527 }, { "epoch": 0.2455433266160285, "grad_norm": 0.4909055233001709, "learning_rate": 8.186046511627907e-06, "loss": 0.4206, "step": 528 }, { "epoch": 0.24600837079522556, "grad_norm": 0.5348622798919678, "learning_rate": 8.201550387596899e-06, "loss": 0.4373, "step": 529 }, { "epoch": 0.24647341497442257, "grad_norm": 0.41662895679473877, "learning_rate": 8.217054263565893e-06, "loss": 0.4021, "step": 530 }, { "epoch": 0.2469384591536196, "grad_norm": 0.5400405526161194, "learning_rate": 8.232558139534885e-06, "loss": 0.4104, "step": 531 }, { "epoch": 0.2474035033328166, "grad_norm": 0.4757499694824219, "learning_rate": 8.248062015503877e-06, "loss": 0.4034, "step": 532 }, { "epoch": 0.24786854751201365, "grad_norm": 0.49683964252471924, "learning_rate": 8.263565891472869e-06, "loss": 0.4145, "step": 533 }, { "epoch": 0.24833359169121066, "grad_norm": 0.529530942440033, "learning_rate": 8.279069767441861e-06, "loss": 0.4188, "step": 534 }, { "epoch": 0.24879863587040768, "grad_norm": 0.5073703527450562, "learning_rate": 8.294573643410853e-06, "loss": 0.4071, "step": 535 }, { "epoch": 0.24926368004960472, "grad_norm": 0.5136784315109253, "learning_rate": 8.310077519379845e-06, "loss": 0.4178, "step": 536 }, { "epoch": 0.24972872422880174, "grad_norm": 0.4692307412624359, "learning_rate": 8.325581395348837e-06, "loss": 0.4155, "step": 537 }, { "epoch": 0.2501937684079988, "grad_norm": 0.5519499778747559, "learning_rate": 8.34108527131783e-06, "loss": 0.4088, "step": 538 }, { "epoch": 0.2506588125871958, "grad_norm": 0.5323079228401184, "learning_rate": 8.356589147286823e-06, "loss": 0.3887, "step": 539 }, { "epoch": 0.2511238567663928, "grad_norm": 0.5130917429924011, "learning_rate": 8.372093023255815e-06, "loss": 0.4473, "step": 540 }, { "epoch": 0.25158890094558983, "grad_norm": 0.5379475951194763, "learning_rate": 8.387596899224807e-06, "loss": 0.4092, "step": 541 }, { "epoch": 0.25205394512478685, "grad_norm": 0.4682493507862091, "learning_rate": 8.403100775193799e-06, "loss": 0.407, "step": 542 }, { "epoch": 0.25251898930398387, "grad_norm": 0.5762583017349243, "learning_rate": 8.418604651162792e-06, "loss": 0.4402, "step": 543 }, { "epoch": 0.2529840334831809, "grad_norm": 0.4663183093070984, "learning_rate": 8.434108527131784e-06, "loss": 0.4079, "step": 544 }, { "epoch": 0.25344907766237795, "grad_norm": 0.5599426627159119, "learning_rate": 8.449612403100775e-06, "loss": 0.4041, "step": 545 }, { "epoch": 0.25391412184157497, "grad_norm": 0.4949679374694824, "learning_rate": 8.465116279069768e-06, "loss": 0.4055, "step": 546 }, { "epoch": 0.254379166020772, "grad_norm": 0.4728296995162964, "learning_rate": 8.48062015503876e-06, "loss": 0.4044, "step": 547 }, { "epoch": 0.254844210199969, "grad_norm": 0.5103499889373779, "learning_rate": 8.496124031007752e-06, "loss": 0.4158, "step": 548 }, { "epoch": 0.255309254379166, "grad_norm": 0.5350103974342346, "learning_rate": 8.511627906976744e-06, "loss": 0.4333, "step": 549 }, { "epoch": 0.25577429855836303, "grad_norm": 0.47756704688072205, "learning_rate": 8.527131782945736e-06, "loss": 0.4273, "step": 550 }, { "epoch": 0.25623934273756005, "grad_norm": 0.6039769649505615, "learning_rate": 8.54263565891473e-06, "loss": 0.447, "step": 551 }, { "epoch": 0.25670438691675707, "grad_norm": 0.5659769177436829, "learning_rate": 8.558139534883722e-06, "loss": 0.4298, "step": 552 }, { "epoch": 0.25716943109595414, "grad_norm": 0.46599307656288147, "learning_rate": 8.573643410852714e-06, "loss": 0.3993, "step": 553 }, { "epoch": 0.25763447527515115, "grad_norm": 0.5073681473731995, "learning_rate": 8.589147286821706e-06, "loss": 0.426, "step": 554 }, { "epoch": 0.25809951945434817, "grad_norm": 0.4540097117424011, "learning_rate": 8.604651162790698e-06, "loss": 0.4197, "step": 555 }, { "epoch": 0.2585645636335452, "grad_norm": 0.46219104528427124, "learning_rate": 8.620155038759692e-06, "loss": 0.4181, "step": 556 }, { "epoch": 0.2590296078127422, "grad_norm": 0.5334506034851074, "learning_rate": 8.635658914728682e-06, "loss": 0.4244, "step": 557 }, { "epoch": 0.2594946519919392, "grad_norm": 0.6203073859214783, "learning_rate": 8.651162790697674e-06, "loss": 0.4125, "step": 558 }, { "epoch": 0.25995969617113623, "grad_norm": 0.5533168315887451, "learning_rate": 8.666666666666668e-06, "loss": 0.4251, "step": 559 }, { "epoch": 0.2604247403503333, "grad_norm": 0.5441173911094666, "learning_rate": 8.68217054263566e-06, "loss": 0.4062, "step": 560 }, { "epoch": 0.2608897845295303, "grad_norm": 0.6109142899513245, "learning_rate": 8.697674418604652e-06, "loss": 0.454, "step": 561 }, { "epoch": 0.26135482870872734, "grad_norm": 0.5405747890472412, "learning_rate": 8.713178294573644e-06, "loss": 0.418, "step": 562 }, { "epoch": 0.26181987288792435, "grad_norm": 0.48734837770462036, "learning_rate": 8.728682170542636e-06, "loss": 0.417, "step": 563 }, { "epoch": 0.26228491706712137, "grad_norm": 0.6338199973106384, "learning_rate": 8.74418604651163e-06, "loss": 0.3976, "step": 564 }, { "epoch": 0.2627499612463184, "grad_norm": 0.5382621884346008, "learning_rate": 8.759689922480622e-06, "loss": 0.446, "step": 565 }, { "epoch": 0.2632150054255154, "grad_norm": 0.4548230767250061, "learning_rate": 8.775193798449612e-06, "loss": 0.4227, "step": 566 }, { "epoch": 0.2636800496047125, "grad_norm": 0.6364620327949524, "learning_rate": 8.790697674418606e-06, "loss": 0.4114, "step": 567 }, { "epoch": 0.2641450937839095, "grad_norm": 0.6153837442398071, "learning_rate": 8.806201550387598e-06, "loss": 0.4259, "step": 568 }, { "epoch": 0.2646101379631065, "grad_norm": 0.5064429044723511, "learning_rate": 8.82170542635659e-06, "loss": 0.4256, "step": 569 }, { "epoch": 0.2650751821423035, "grad_norm": 0.6010052561759949, "learning_rate": 8.837209302325582e-06, "loss": 0.411, "step": 570 }, { "epoch": 0.26554022632150054, "grad_norm": 0.659740686416626, "learning_rate": 8.852713178294574e-06, "loss": 0.3959, "step": 571 }, { "epoch": 0.26600527050069755, "grad_norm": 0.548054039478302, "learning_rate": 8.868217054263567e-06, "loss": 0.4324, "step": 572 }, { "epoch": 0.26647031467989457, "grad_norm": 0.5880405306816101, "learning_rate": 8.88372093023256e-06, "loss": 0.3957, "step": 573 }, { "epoch": 0.2669353588590916, "grad_norm": 0.5269092321395874, "learning_rate": 8.899224806201551e-06, "loss": 0.4376, "step": 574 }, { "epoch": 0.26740040303828866, "grad_norm": 0.5422910451889038, "learning_rate": 8.914728682170543e-06, "loss": 0.3812, "step": 575 }, { "epoch": 0.2678654472174857, "grad_norm": 0.5918363332748413, "learning_rate": 8.930232558139535e-06, "loss": 0.4285, "step": 576 }, { "epoch": 0.2683304913966827, "grad_norm": 0.5592337250709534, "learning_rate": 8.945736434108527e-06, "loss": 0.3972, "step": 577 }, { "epoch": 0.2687955355758797, "grad_norm": 0.5338684916496277, "learning_rate": 8.961240310077521e-06, "loss": 0.4245, "step": 578 }, { "epoch": 0.2692605797550767, "grad_norm": 0.46233999729156494, "learning_rate": 8.976744186046511e-06, "loss": 0.416, "step": 579 }, { "epoch": 0.26972562393427374, "grad_norm": 0.6775352954864502, "learning_rate": 8.992248062015505e-06, "loss": 0.4152, "step": 580 }, { "epoch": 0.27019066811347076, "grad_norm": 0.6111975312232971, "learning_rate": 9.007751937984497e-06, "loss": 0.4496, "step": 581 }, { "epoch": 0.2706557122926678, "grad_norm": 0.4907594621181488, "learning_rate": 9.023255813953489e-06, "loss": 0.4145, "step": 582 }, { "epoch": 0.27112075647186484, "grad_norm": 0.6536867022514343, "learning_rate": 9.038759689922481e-06, "loss": 0.4421, "step": 583 }, { "epoch": 0.27158580065106186, "grad_norm": 0.4851461946964264, "learning_rate": 9.054263565891473e-06, "loss": 0.3856, "step": 584 }, { "epoch": 0.2720508448302589, "grad_norm": 0.552401602268219, "learning_rate": 9.069767441860465e-06, "loss": 0.4085, "step": 585 }, { "epoch": 0.2725158890094559, "grad_norm": 0.5714566111564636, "learning_rate": 9.085271317829459e-06, "loss": 0.3893, "step": 586 }, { "epoch": 0.2729809331886529, "grad_norm": 0.5839101076126099, "learning_rate": 9.10077519379845e-06, "loss": 0.4145, "step": 587 }, { "epoch": 0.2734459773678499, "grad_norm": 0.5246545076370239, "learning_rate": 9.116279069767443e-06, "loss": 0.4316, "step": 588 }, { "epoch": 0.273911021547047, "grad_norm": 0.49281787872314453, "learning_rate": 9.131782945736435e-06, "loss": 0.4272, "step": 589 }, { "epoch": 0.274376065726244, "grad_norm": 0.5275765657424927, "learning_rate": 9.147286821705427e-06, "loss": 0.4141, "step": 590 }, { "epoch": 0.27484110990544103, "grad_norm": 0.4734654724597931, "learning_rate": 9.162790697674419e-06, "loss": 0.4031, "step": 591 }, { "epoch": 0.27530615408463804, "grad_norm": 0.5400969386100769, "learning_rate": 9.178294573643411e-06, "loss": 0.4517, "step": 592 }, { "epoch": 0.27577119826383506, "grad_norm": 0.4679478406906128, "learning_rate": 9.193798449612403e-06, "loss": 0.4014, "step": 593 }, { "epoch": 0.2762362424430321, "grad_norm": 0.6086196899414062, "learning_rate": 9.209302325581397e-06, "loss": 0.4185, "step": 594 }, { "epoch": 0.2767012866222291, "grad_norm": 0.5482898950576782, "learning_rate": 9.224806201550389e-06, "loss": 0.4391, "step": 595 }, { "epoch": 0.2771663308014261, "grad_norm": 0.5058215856552124, "learning_rate": 9.24031007751938e-06, "loss": 0.3826, "step": 596 }, { "epoch": 0.2776313749806232, "grad_norm": 0.5932140350341797, "learning_rate": 9.255813953488373e-06, "loss": 0.4105, "step": 597 }, { "epoch": 0.2780964191598202, "grad_norm": 0.4306205213069916, "learning_rate": 9.271317829457365e-06, "loss": 0.4083, "step": 598 }, { "epoch": 0.2785614633390172, "grad_norm": 0.4987516701221466, "learning_rate": 9.286821705426358e-06, "loss": 0.4078, "step": 599 }, { "epoch": 0.27902650751821423, "grad_norm": 0.5451740026473999, "learning_rate": 9.30232558139535e-06, "loss": 0.4184, "step": 600 }, { "epoch": 0.27949155169741124, "grad_norm": 0.52000892162323, "learning_rate": 9.31782945736434e-06, "loss": 0.422, "step": 601 }, { "epoch": 0.27995659587660826, "grad_norm": 0.524783194065094, "learning_rate": 9.333333333333334e-06, "loss": 0.4121, "step": 602 }, { "epoch": 0.2804216400558053, "grad_norm": 0.5225769877433777, "learning_rate": 9.348837209302326e-06, "loss": 0.4339, "step": 603 }, { "epoch": 0.28088668423500235, "grad_norm": 0.563931405544281, "learning_rate": 9.364341085271318e-06, "loss": 0.4212, "step": 604 }, { "epoch": 0.28135172841419936, "grad_norm": 0.6426302790641785, "learning_rate": 9.37984496124031e-06, "loss": 0.4, "step": 605 }, { "epoch": 0.2818167725933964, "grad_norm": 0.476382851600647, "learning_rate": 9.395348837209302e-06, "loss": 0.399, "step": 606 }, { "epoch": 0.2822818167725934, "grad_norm": 0.6564604640007019, "learning_rate": 9.410852713178296e-06, "loss": 0.4106, "step": 607 }, { "epoch": 0.2827468609517904, "grad_norm": 0.5618086457252502, "learning_rate": 9.426356589147288e-06, "loss": 0.4193, "step": 608 }, { "epoch": 0.28321190513098743, "grad_norm": 0.6813092231750488, "learning_rate": 9.44186046511628e-06, "loss": 0.4184, "step": 609 }, { "epoch": 0.28367694931018445, "grad_norm": 0.5115700960159302, "learning_rate": 9.457364341085272e-06, "loss": 0.4251, "step": 610 }, { "epoch": 0.2841419934893815, "grad_norm": 0.620830774307251, "learning_rate": 9.472868217054264e-06, "loss": 0.3938, "step": 611 }, { "epoch": 0.28460703766857853, "grad_norm": 0.4752020239830017, "learning_rate": 9.488372093023258e-06, "loss": 0.4005, "step": 612 }, { "epoch": 0.28507208184777555, "grad_norm": 0.5442911386489868, "learning_rate": 9.503875968992248e-06, "loss": 0.4185, "step": 613 }, { "epoch": 0.28553712602697257, "grad_norm": 0.5141138434410095, "learning_rate": 9.51937984496124e-06, "loss": 0.4421, "step": 614 }, { "epoch": 0.2860021702061696, "grad_norm": 0.5211554169654846, "learning_rate": 9.534883720930234e-06, "loss": 0.402, "step": 615 }, { "epoch": 0.2864672143853666, "grad_norm": 0.5350854396820068, "learning_rate": 9.550387596899226e-06, "loss": 0.4229, "step": 616 }, { "epoch": 0.2869322585645636, "grad_norm": 0.5418005585670471, "learning_rate": 9.565891472868218e-06, "loss": 0.4264, "step": 617 }, { "epoch": 0.28739730274376063, "grad_norm": 0.5651183128356934, "learning_rate": 9.58139534883721e-06, "loss": 0.412, "step": 618 }, { "epoch": 0.2878623469229577, "grad_norm": 0.4958003759384155, "learning_rate": 9.596899224806202e-06, "loss": 0.3929, "step": 619 }, { "epoch": 0.2883273911021547, "grad_norm": 0.6447129845619202, "learning_rate": 9.612403100775196e-06, "loss": 0.4177, "step": 620 }, { "epoch": 0.28879243528135173, "grad_norm": 0.5777575969696045, "learning_rate": 9.627906976744188e-06, "loss": 0.4193, "step": 621 }, { "epoch": 0.28925747946054875, "grad_norm": 0.7008224129676819, "learning_rate": 9.643410852713178e-06, "loss": 0.423, "step": 622 }, { "epoch": 0.28972252363974577, "grad_norm": 0.4426723122596741, "learning_rate": 9.658914728682172e-06, "loss": 0.4038, "step": 623 }, { "epoch": 0.2901875678189428, "grad_norm": 0.6383287310600281, "learning_rate": 9.674418604651164e-06, "loss": 0.4213, "step": 624 }, { "epoch": 0.2906526119981398, "grad_norm": 0.4844573438167572, "learning_rate": 9.689922480620156e-06, "loss": 0.4355, "step": 625 }, { "epoch": 0.29111765617733687, "grad_norm": 0.6763868927955627, "learning_rate": 9.705426356589148e-06, "loss": 0.4122, "step": 626 }, { "epoch": 0.2915827003565339, "grad_norm": 0.508306622505188, "learning_rate": 9.72093023255814e-06, "loss": 0.4067, "step": 627 }, { "epoch": 0.2920477445357309, "grad_norm": 0.5674280524253845, "learning_rate": 9.736434108527133e-06, "loss": 0.4359, "step": 628 }, { "epoch": 0.2925127887149279, "grad_norm": 0.5861987471580505, "learning_rate": 9.751937984496125e-06, "loss": 0.3948, "step": 629 }, { "epoch": 0.29297783289412493, "grad_norm": 0.5261598825454712, "learning_rate": 9.767441860465117e-06, "loss": 0.4339, "step": 630 }, { "epoch": 0.29344287707332195, "grad_norm": 0.4901929199695587, "learning_rate": 9.78294573643411e-06, "loss": 0.3916, "step": 631 }, { "epoch": 0.29390792125251897, "grad_norm": 0.5431584715843201, "learning_rate": 9.798449612403101e-06, "loss": 0.4372, "step": 632 }, { "epoch": 0.29437296543171604, "grad_norm": 0.4933854043483734, "learning_rate": 9.813953488372093e-06, "loss": 0.4107, "step": 633 }, { "epoch": 0.29483800961091305, "grad_norm": 0.5128764510154724, "learning_rate": 9.829457364341087e-06, "loss": 0.4222, "step": 634 }, { "epoch": 0.29530305379011007, "grad_norm": 0.577727198600769, "learning_rate": 9.844961240310077e-06, "loss": 0.4417, "step": 635 }, { "epoch": 0.2957680979693071, "grad_norm": 0.5306776762008667, "learning_rate": 9.860465116279071e-06, "loss": 0.4031, "step": 636 }, { "epoch": 0.2962331421485041, "grad_norm": 0.5434820055961609, "learning_rate": 9.875968992248063e-06, "loss": 0.4148, "step": 637 }, { "epoch": 0.2966981863277011, "grad_norm": 0.5308932065963745, "learning_rate": 9.891472868217055e-06, "loss": 0.4017, "step": 638 }, { "epoch": 0.29716323050689814, "grad_norm": 0.6192681193351746, "learning_rate": 9.906976744186047e-06, "loss": 0.4236, "step": 639 }, { "epoch": 0.2976282746860952, "grad_norm": 0.5245199799537659, "learning_rate": 9.922480620155039e-06, "loss": 0.4206, "step": 640 }, { "epoch": 0.2980933188652922, "grad_norm": 0.6172415018081665, "learning_rate": 9.937984496124031e-06, "loss": 0.4147, "step": 641 }, { "epoch": 0.29855836304448924, "grad_norm": 0.49070969223976135, "learning_rate": 9.953488372093025e-06, "loss": 0.4104, "step": 642 }, { "epoch": 0.29902340722368626, "grad_norm": 0.5000258684158325, "learning_rate": 9.968992248062017e-06, "loss": 0.4026, "step": 643 }, { "epoch": 0.29948845140288327, "grad_norm": 0.601202130317688, "learning_rate": 9.984496124031009e-06, "loss": 0.4187, "step": 644 }, { "epoch": 0.2999534955820803, "grad_norm": 0.5487759113311768, "learning_rate": 1e-05, "loss": 0.3904, "step": 645 }, { "epoch": 0.3004185397612773, "grad_norm": 0.4648313522338867, "learning_rate": 9.99999926779061e-06, "loss": 0.4065, "step": 646 }, { "epoch": 0.3008835839404743, "grad_norm": 0.542242705821991, "learning_rate": 9.999997071162647e-06, "loss": 0.403, "step": 647 }, { "epoch": 0.3013486281196714, "grad_norm": 0.5556986331939697, "learning_rate": 9.999993410116758e-06, "loss": 0.4012, "step": 648 }, { "epoch": 0.3018136722988684, "grad_norm": 0.4553111493587494, "learning_rate": 9.999988284654016e-06, "loss": 0.423, "step": 649 }, { "epoch": 0.3022787164780654, "grad_norm": 0.5509373545646667, "learning_rate": 9.999981694775921e-06, "loss": 0.4202, "step": 650 }, { "epoch": 0.30274376065726244, "grad_norm": 0.4689308702945709, "learning_rate": 9.999973640484402e-06, "loss": 0.3928, "step": 651 }, { "epoch": 0.30320880483645946, "grad_norm": 0.4984801411628723, "learning_rate": 9.99996412178182e-06, "loss": 0.4171, "step": 652 }, { "epoch": 0.30367384901565647, "grad_norm": 0.5575225949287415, "learning_rate": 9.999953138670961e-06, "loss": 0.4134, "step": 653 }, { "epoch": 0.3041388931948535, "grad_norm": 0.5620846748352051, "learning_rate": 9.999940691155043e-06, "loss": 0.4043, "step": 654 }, { "epoch": 0.30460393737405056, "grad_norm": 0.5309882760047913, "learning_rate": 9.999926779237713e-06, "loss": 0.4286, "step": 655 }, { "epoch": 0.3050689815532476, "grad_norm": 0.5248701572418213, "learning_rate": 9.999911402923043e-06, "loss": 0.4163, "step": 656 }, { "epoch": 0.3055340257324446, "grad_norm": 0.5015712380409241, "learning_rate": 9.999894562215538e-06, "loss": 0.4136, "step": 657 }, { "epoch": 0.3059990699116416, "grad_norm": 0.546489953994751, "learning_rate": 9.999876257120127e-06, "loss": 0.4217, "step": 658 }, { "epoch": 0.3064641140908386, "grad_norm": 0.4728710651397705, "learning_rate": 9.999856487642177e-06, "loss": 0.3868, "step": 659 }, { "epoch": 0.30692915827003564, "grad_norm": 0.5765323638916016, "learning_rate": 9.999835253787472e-06, "loss": 0.4518, "step": 660 }, { "epoch": 0.30739420244923266, "grad_norm": 0.5118497014045715, "learning_rate": 9.999812555562239e-06, "loss": 0.432, "step": 661 }, { "epoch": 0.30785924662842973, "grad_norm": 0.5273929238319397, "learning_rate": 9.999788392973117e-06, "loss": 0.3831, "step": 662 }, { "epoch": 0.30832429080762674, "grad_norm": 0.5201013088226318, "learning_rate": 9.99976276602719e-06, "loss": 0.4127, "step": 663 }, { "epoch": 0.30878933498682376, "grad_norm": 0.6540030837059021, "learning_rate": 9.999735674731959e-06, "loss": 0.4359, "step": 664 }, { "epoch": 0.3092543791660208, "grad_norm": 0.5233065485954285, "learning_rate": 9.999707119095361e-06, "loss": 0.428, "step": 665 }, { "epoch": 0.3097194233452178, "grad_norm": 0.5521329641342163, "learning_rate": 9.99967709912576e-06, "loss": 0.4054, "step": 666 }, { "epoch": 0.3101844675244148, "grad_norm": 0.6145597696304321, "learning_rate": 9.999645614831946e-06, "loss": 0.3983, "step": 667 }, { "epoch": 0.3106495117036118, "grad_norm": 0.6071444153785706, "learning_rate": 9.999612666223139e-06, "loss": 0.4286, "step": 668 }, { "epoch": 0.31111455588280884, "grad_norm": 0.5950531363487244, "learning_rate": 9.999578253308994e-06, "loss": 0.4325, "step": 669 }, { "epoch": 0.3115796000620059, "grad_norm": 0.608697772026062, "learning_rate": 9.99954237609959e-06, "loss": 0.3965, "step": 670 }, { "epoch": 0.31204464424120293, "grad_norm": 0.5764162540435791, "learning_rate": 9.999505034605428e-06, "loss": 0.4166, "step": 671 }, { "epoch": 0.31250968842039994, "grad_norm": 0.6588258147239685, "learning_rate": 9.999466228837452e-06, "loss": 0.4122, "step": 672 }, { "epoch": 0.31297473259959696, "grad_norm": 0.5878240466117859, "learning_rate": 9.999425958807023e-06, "loss": 0.4289, "step": 673 }, { "epoch": 0.313439776778794, "grad_norm": 0.6487173438072205, "learning_rate": 9.999384224525938e-06, "loss": 0.4348, "step": 674 }, { "epoch": 0.313904820957991, "grad_norm": 0.5158074498176575, "learning_rate": 9.99934102600642e-06, "loss": 0.402, "step": 675 }, { "epoch": 0.314369865137188, "grad_norm": 0.6373001933097839, "learning_rate": 9.999296363261118e-06, "loss": 0.4098, "step": 676 }, { "epoch": 0.3148349093163851, "grad_norm": 0.5033225417137146, "learning_rate": 9.999250236303118e-06, "loss": 0.4035, "step": 677 }, { "epoch": 0.3152999534955821, "grad_norm": 0.4822746217250824, "learning_rate": 9.999202645145927e-06, "loss": 0.4164, "step": 678 }, { "epoch": 0.3157649976747791, "grad_norm": 0.4495120644569397, "learning_rate": 9.999153589803484e-06, "loss": 0.3747, "step": 679 }, { "epoch": 0.31623004185397613, "grad_norm": 0.5534586310386658, "learning_rate": 9.999103070290155e-06, "loss": 0.4011, "step": 680 }, { "epoch": 0.31669508603317315, "grad_norm": 0.4584212601184845, "learning_rate": 9.999051086620738e-06, "loss": 0.4116, "step": 681 }, { "epoch": 0.31716013021237016, "grad_norm": 0.6077730655670166, "learning_rate": 9.998997638810462e-06, "loss": 0.4163, "step": 682 }, { "epoch": 0.3176251743915672, "grad_norm": 0.4868007004261017, "learning_rate": 9.998942726874974e-06, "loss": 0.4075, "step": 683 }, { "epoch": 0.31809021857076425, "grad_norm": 0.5131934285163879, "learning_rate": 9.99888635083036e-06, "loss": 0.4178, "step": 684 }, { "epoch": 0.31855526274996127, "grad_norm": 0.5666374564170837, "learning_rate": 9.998828510693133e-06, "loss": 0.4147, "step": 685 }, { "epoch": 0.3190203069291583, "grad_norm": 0.6439737677574158, "learning_rate": 9.99876920648023e-06, "loss": 0.4148, "step": 686 }, { "epoch": 0.3194853511083553, "grad_norm": 0.5670461058616638, "learning_rate": 9.998708438209022e-06, "loss": 0.4088, "step": 687 }, { "epoch": 0.3199503952875523, "grad_norm": 0.6633548736572266, "learning_rate": 9.99864620589731e-06, "loss": 0.3982, "step": 688 }, { "epoch": 0.32041543946674933, "grad_norm": 0.6031489968299866, "learning_rate": 9.998582509563315e-06, "loss": 0.4095, "step": 689 }, { "epoch": 0.32088048364594635, "grad_norm": 0.49488380551338196, "learning_rate": 9.998517349225698e-06, "loss": 0.3967, "step": 690 }, { "epoch": 0.32134552782514336, "grad_norm": 0.6333311200141907, "learning_rate": 9.99845072490354e-06, "loss": 0.4425, "step": 691 }, { "epoch": 0.32181057200434043, "grad_norm": 0.5096359252929688, "learning_rate": 9.998382636616355e-06, "loss": 0.4033, "step": 692 }, { "epoch": 0.32227561618353745, "grad_norm": 0.502375602722168, "learning_rate": 9.998313084384086e-06, "loss": 0.3965, "step": 693 }, { "epoch": 0.32274066036273447, "grad_norm": 0.6580330729484558, "learning_rate": 9.998242068227103e-06, "loss": 0.4052, "step": 694 }, { "epoch": 0.3232057045419315, "grad_norm": 0.5036015510559082, "learning_rate": 9.998169588166204e-06, "loss": 0.4154, "step": 695 }, { "epoch": 0.3236707487211285, "grad_norm": 0.5556278228759766, "learning_rate": 9.99809564422262e-06, "loss": 0.4296, "step": 696 }, { "epoch": 0.3241357929003255, "grad_norm": 0.635485053062439, "learning_rate": 9.998020236418008e-06, "loss": 0.4379, "step": 697 }, { "epoch": 0.32460083707952253, "grad_norm": 0.48889610171318054, "learning_rate": 9.997943364774451e-06, "loss": 0.414, "step": 698 }, { "epoch": 0.3250658812587196, "grad_norm": 0.4967147707939148, "learning_rate": 9.997865029314464e-06, "loss": 0.4011, "step": 699 }, { "epoch": 0.3255309254379166, "grad_norm": 0.595470666885376, "learning_rate": 9.997785230060993e-06, "loss": 0.438, "step": 700 }, { "epoch": 0.32599596961711363, "grad_norm": 0.533093273639679, "learning_rate": 9.997703967037406e-06, "loss": 0.4152, "step": 701 }, { "epoch": 0.32646101379631065, "grad_norm": 0.5301925539970398, "learning_rate": 9.997621240267505e-06, "loss": 0.4096, "step": 702 }, { "epoch": 0.32692605797550767, "grad_norm": 0.5256016254425049, "learning_rate": 9.997537049775522e-06, "loss": 0.4057, "step": 703 }, { "epoch": 0.3273911021547047, "grad_norm": 0.6002234816551208, "learning_rate": 9.997451395586112e-06, "loss": 0.4221, "step": 704 }, { "epoch": 0.3278561463339017, "grad_norm": 0.5671415328979492, "learning_rate": 9.997364277724362e-06, "loss": 0.4038, "step": 705 }, { "epoch": 0.32832119051309877, "grad_norm": 0.5628596544265747, "learning_rate": 9.997275696215788e-06, "loss": 0.3987, "step": 706 }, { "epoch": 0.3287862346922958, "grad_norm": 0.6205607652664185, "learning_rate": 9.997185651086336e-06, "loss": 0.4259, "step": 707 }, { "epoch": 0.3292512788714928, "grad_norm": 0.5086963176727295, "learning_rate": 9.997094142362376e-06, "loss": 0.4084, "step": 708 }, { "epoch": 0.3297163230506898, "grad_norm": 0.5511040091514587, "learning_rate": 9.99700117007071e-06, "loss": 0.4085, "step": 709 }, { "epoch": 0.33018136722988684, "grad_norm": 0.5336337089538574, "learning_rate": 9.996906734238568e-06, "loss": 0.4397, "step": 710 }, { "epoch": 0.33064641140908385, "grad_norm": 0.5333123207092285, "learning_rate": 9.99681083489361e-06, "loss": 0.4234, "step": 711 }, { "epoch": 0.33111145558828087, "grad_norm": 0.5140913724899292, "learning_rate": 9.99671347206392e-06, "loss": 0.4133, "step": 712 }, { "epoch": 0.3315764997674779, "grad_norm": 0.5568680167198181, "learning_rate": 9.996614645778019e-06, "loss": 0.4144, "step": 713 }, { "epoch": 0.33204154394667496, "grad_norm": 0.4299158751964569, "learning_rate": 9.996514356064848e-06, "loss": 0.4325, "step": 714 }, { "epoch": 0.33250658812587197, "grad_norm": 0.5466892123222351, "learning_rate": 9.996412602953782e-06, "loss": 0.4002, "step": 715 }, { "epoch": 0.332971632305069, "grad_norm": 0.5300124287605286, "learning_rate": 9.99630938647462e-06, "loss": 0.4113, "step": 716 }, { "epoch": 0.333436676484266, "grad_norm": 0.4665888845920563, "learning_rate": 9.996204706657597e-06, "loss": 0.4264, "step": 717 }, { "epoch": 0.333901720663463, "grad_norm": 0.6327192187309265, "learning_rate": 9.99609856353337e-06, "loss": 0.4139, "step": 718 }, { "epoch": 0.33436676484266004, "grad_norm": 0.5009886622428894, "learning_rate": 9.995990957133024e-06, "loss": 0.3818, "step": 719 }, { "epoch": 0.33483180902185705, "grad_norm": 0.5028725266456604, "learning_rate": 9.99588188748808e-06, "loss": 0.3934, "step": 720 }, { "epoch": 0.3352968532010541, "grad_norm": 0.6734301447868347, "learning_rate": 9.995771354630476e-06, "loss": 0.4213, "step": 721 }, { "epoch": 0.33576189738025114, "grad_norm": 0.5146905183792114, "learning_rate": 9.995659358592592e-06, "loss": 0.387, "step": 722 }, { "epoch": 0.33622694155944816, "grad_norm": 0.4680449664592743, "learning_rate": 9.995545899407226e-06, "loss": 0.4296, "step": 723 }, { "epoch": 0.33669198573864517, "grad_norm": 0.5770846605300903, "learning_rate": 9.995430977107612e-06, "loss": 0.4088, "step": 724 }, { "epoch": 0.3371570299178422, "grad_norm": 0.5241069793701172, "learning_rate": 9.995314591727404e-06, "loss": 0.4065, "step": 725 }, { "epoch": 0.3376220740970392, "grad_norm": 0.46266067028045654, "learning_rate": 9.995196743300693e-06, "loss": 0.4014, "step": 726 }, { "epoch": 0.3380871182762362, "grad_norm": 0.4953598380088806, "learning_rate": 9.995077431861992e-06, "loss": 0.3707, "step": 727 }, { "epoch": 0.3385521624554333, "grad_norm": 0.47356897592544556, "learning_rate": 9.994956657446248e-06, "loss": 0.4058, "step": 728 }, { "epoch": 0.3390172066346303, "grad_norm": 0.47377198934555054, "learning_rate": 9.994834420088832e-06, "loss": 0.4042, "step": 729 }, { "epoch": 0.3394822508138273, "grad_norm": 0.5034839510917664, "learning_rate": 9.99471071982555e-06, "loss": 0.4001, "step": 730 }, { "epoch": 0.33994729499302434, "grad_norm": 0.462456613779068, "learning_rate": 9.994585556692624e-06, "loss": 0.3885, "step": 731 }, { "epoch": 0.34041233917222136, "grad_norm": 0.5245911478996277, "learning_rate": 9.994458930726717e-06, "loss": 0.419, "step": 732 }, { "epoch": 0.3408773833514184, "grad_norm": 0.4660412669181824, "learning_rate": 9.994330841964916e-06, "loss": 0.4144, "step": 733 }, { "epoch": 0.3413424275306154, "grad_norm": 0.54203861951828, "learning_rate": 9.994201290444734e-06, "loss": 0.3974, "step": 734 }, { "epoch": 0.3418074717098124, "grad_norm": 0.5267844200134277, "learning_rate": 9.994070276204115e-06, "loss": 0.428, "step": 735 }, { "epoch": 0.3422725158890095, "grad_norm": 0.5129567384719849, "learning_rate": 9.993937799281435e-06, "loss": 0.4135, "step": 736 }, { "epoch": 0.3427375600682065, "grad_norm": 0.5932128429412842, "learning_rate": 9.993803859715488e-06, "loss": 0.4103, "step": 737 }, { "epoch": 0.3432026042474035, "grad_norm": 0.48848265409469604, "learning_rate": 9.993668457545505e-06, "loss": 0.4022, "step": 738 }, { "epoch": 0.3436676484266005, "grad_norm": 0.5327843427658081, "learning_rate": 9.993531592811146e-06, "loss": 0.4113, "step": 739 }, { "epoch": 0.34413269260579754, "grad_norm": 0.571151077747345, "learning_rate": 9.993393265552494e-06, "loss": 0.4191, "step": 740 }, { "epoch": 0.34459773678499456, "grad_norm": 0.5364315509796143, "learning_rate": 9.993253475810061e-06, "loss": 0.4134, "step": 741 }, { "epoch": 0.3450627809641916, "grad_norm": 0.5578524470329285, "learning_rate": 9.993112223624793e-06, "loss": 0.4269, "step": 742 }, { "epoch": 0.34552782514338864, "grad_norm": 0.599855899810791, "learning_rate": 9.992969509038057e-06, "loss": 0.4095, "step": 743 }, { "epoch": 0.34599286932258566, "grad_norm": 0.48594430088996887, "learning_rate": 9.992825332091654e-06, "loss": 0.4119, "step": 744 }, { "epoch": 0.3464579135017827, "grad_norm": 0.6377593874931335, "learning_rate": 9.992679692827812e-06, "loss": 0.4411, "step": 745 }, { "epoch": 0.3469229576809797, "grad_norm": 0.5200589895248413, "learning_rate": 9.992532591289183e-06, "loss": 0.4241, "step": 746 }, { "epoch": 0.3473880018601767, "grad_norm": 0.5711746215820312, "learning_rate": 9.992384027518853e-06, "loss": 0.422, "step": 747 }, { "epoch": 0.3478530460393737, "grad_norm": 0.5109982490539551, "learning_rate": 9.992234001560333e-06, "loss": 0.3909, "step": 748 }, { "epoch": 0.34831809021857074, "grad_norm": 0.6026611328125, "learning_rate": 9.992082513457564e-06, "loss": 0.4277, "step": 749 }, { "epoch": 0.3487831343977678, "grad_norm": 0.5200704336166382, "learning_rate": 9.991929563254913e-06, "loss": 0.4007, "step": 750 }, { "epoch": 0.34924817857696483, "grad_norm": 0.5779603123664856, "learning_rate": 9.99177515099718e-06, "loss": 0.3991, "step": 751 }, { "epoch": 0.34971322275616185, "grad_norm": 0.5946140885353088, "learning_rate": 9.991619276729585e-06, "loss": 0.439, "step": 752 }, { "epoch": 0.35017826693535886, "grad_norm": 0.5260143280029297, "learning_rate": 9.991461940497786e-06, "loss": 0.4202, "step": 753 }, { "epoch": 0.3506433111145559, "grad_norm": 0.5628990530967712, "learning_rate": 9.99130314234786e-06, "loss": 0.3997, "step": 754 }, { "epoch": 0.3511083552937529, "grad_norm": 0.6147162318229675, "learning_rate": 9.99114288232632e-06, "loss": 0.4137, "step": 755 }, { "epoch": 0.3515733994729499, "grad_norm": 0.5871340036392212, "learning_rate": 9.990981160480098e-06, "loss": 0.4044, "step": 756 }, { "epoch": 0.3520384436521469, "grad_norm": 0.5283817648887634, "learning_rate": 9.990817976856566e-06, "loss": 0.3815, "step": 757 }, { "epoch": 0.352503487831344, "grad_norm": 0.5750249624252319, "learning_rate": 9.990653331503515e-06, "loss": 0.4088, "step": 758 }, { "epoch": 0.352968532010541, "grad_norm": 0.5326053500175476, "learning_rate": 9.990487224469167e-06, "loss": 0.4239, "step": 759 }, { "epoch": 0.35343357618973803, "grad_norm": 0.50933837890625, "learning_rate": 9.990319655802171e-06, "loss": 0.4022, "step": 760 }, { "epoch": 0.35389862036893505, "grad_norm": 0.4641541838645935, "learning_rate": 9.990150625551609e-06, "loss": 0.3847, "step": 761 }, { "epoch": 0.35436366454813206, "grad_norm": 0.49445950984954834, "learning_rate": 9.989980133766983e-06, "loss": 0.395, "step": 762 }, { "epoch": 0.3548287087273291, "grad_norm": 0.5578946471214294, "learning_rate": 9.989808180498229e-06, "loss": 0.4031, "step": 763 }, { "epoch": 0.3552937529065261, "grad_norm": 0.47986650466918945, "learning_rate": 9.98963476579571e-06, "loss": 0.4222, "step": 764 }, { "epoch": 0.35575879708572317, "grad_norm": 0.5039169788360596, "learning_rate": 9.989459889710214e-06, "loss": 0.3973, "step": 765 }, { "epoch": 0.3562238412649202, "grad_norm": 0.47546157240867615, "learning_rate": 9.98928355229296e-06, "loss": 0.3891, "step": 766 }, { "epoch": 0.3566888854441172, "grad_norm": 0.4875746965408325, "learning_rate": 9.989105753595599e-06, "loss": 0.3952, "step": 767 }, { "epoch": 0.3571539296233142, "grad_norm": 0.5113381743431091, "learning_rate": 9.988926493670198e-06, "loss": 0.3624, "step": 768 }, { "epoch": 0.35761897380251123, "grad_norm": 0.4663374722003937, "learning_rate": 9.988745772569266e-06, "loss": 0.417, "step": 769 }, { "epoch": 0.35808401798170825, "grad_norm": 0.5435273051261902, "learning_rate": 9.988563590345728e-06, "loss": 0.4112, "step": 770 }, { "epoch": 0.35854906216090526, "grad_norm": 0.46149757504463196, "learning_rate": 9.988379947052944e-06, "loss": 0.3915, "step": 771 }, { "epoch": 0.35901410634010233, "grad_norm": 0.5259143710136414, "learning_rate": 9.988194842744701e-06, "loss": 0.4186, "step": 772 }, { "epoch": 0.35947915051929935, "grad_norm": 0.4799407422542572, "learning_rate": 9.988008277475214e-06, "loss": 0.4059, "step": 773 }, { "epoch": 0.35994419469849637, "grad_norm": 0.5095851421356201, "learning_rate": 9.987820251299121e-06, "loss": 0.4179, "step": 774 }, { "epoch": 0.3604092388776934, "grad_norm": 0.4954157769680023, "learning_rate": 9.987630764271497e-06, "loss": 0.3825, "step": 775 }, { "epoch": 0.3608742830568904, "grad_norm": 0.5038895010948181, "learning_rate": 9.987439816447836e-06, "loss": 0.4293, "step": 776 }, { "epoch": 0.3613393272360874, "grad_norm": 0.5088568925857544, "learning_rate": 9.987247407884064e-06, "loss": 0.4113, "step": 777 }, { "epoch": 0.36180437141528443, "grad_norm": 0.5863829851150513, "learning_rate": 9.987053538636535e-06, "loss": 0.3904, "step": 778 }, { "epoch": 0.3622694155944815, "grad_norm": 0.44420042634010315, "learning_rate": 9.986858208762032e-06, "loss": 0.4165, "step": 779 }, { "epoch": 0.3627344597736785, "grad_norm": 0.5741595029830933, "learning_rate": 9.986661418317759e-06, "loss": 0.4164, "step": 780 }, { "epoch": 0.36319950395287554, "grad_norm": 0.5108224749565125, "learning_rate": 9.986463167361358e-06, "loss": 0.4025, "step": 781 }, { "epoch": 0.36366454813207255, "grad_norm": 0.6276097893714905, "learning_rate": 9.986263455950888e-06, "loss": 0.4264, "step": 782 }, { "epoch": 0.36412959231126957, "grad_norm": 0.5375624895095825, "learning_rate": 9.986062284144848e-06, "loss": 0.4184, "step": 783 }, { "epoch": 0.3645946364904666, "grad_norm": 0.6147474050521851, "learning_rate": 9.985859652002152e-06, "loss": 0.3896, "step": 784 }, { "epoch": 0.3650596806696636, "grad_norm": 0.586616039276123, "learning_rate": 9.985655559582152e-06, "loss": 0.4102, "step": 785 }, { "epoch": 0.3655247248488606, "grad_norm": 0.5644299387931824, "learning_rate": 9.985450006944621e-06, "loss": 0.3909, "step": 786 }, { "epoch": 0.3659897690280577, "grad_norm": 0.5941767692565918, "learning_rate": 9.98524299414976e-06, "loss": 0.4318, "step": 787 }, { "epoch": 0.3664548132072547, "grad_norm": 0.5848608613014221, "learning_rate": 9.985034521258206e-06, "loss": 0.4178, "step": 788 }, { "epoch": 0.3669198573864517, "grad_norm": 0.5143994688987732, "learning_rate": 9.98482458833101e-06, "loss": 0.395, "step": 789 }, { "epoch": 0.36738490156564874, "grad_norm": 0.560663104057312, "learning_rate": 9.984613195429662e-06, "loss": 0.4118, "step": 790 }, { "epoch": 0.36784994574484575, "grad_norm": 0.520053505897522, "learning_rate": 9.984400342616076e-06, "loss": 0.3849, "step": 791 }, { "epoch": 0.36831498992404277, "grad_norm": 0.5158041715621948, "learning_rate": 9.984186029952591e-06, "loss": 0.4338, "step": 792 }, { "epoch": 0.3687800341032398, "grad_norm": 0.5674142837524414, "learning_rate": 9.983970257501978e-06, "loss": 0.3954, "step": 793 }, { "epoch": 0.36924507828243686, "grad_norm": 0.4826439321041107, "learning_rate": 9.983753025327431e-06, "loss": 0.3815, "step": 794 }, { "epoch": 0.36971012246163387, "grad_norm": 0.4967092275619507, "learning_rate": 9.983534333492575e-06, "loss": 0.41, "step": 795 }, { "epoch": 0.3701751666408309, "grad_norm": 0.5571979284286499, "learning_rate": 9.983314182061461e-06, "loss": 0.4217, "step": 796 }, { "epoch": 0.3706402108200279, "grad_norm": 0.5594668984413147, "learning_rate": 9.983092571098569e-06, "loss": 0.3869, "step": 797 }, { "epoch": 0.3711052549992249, "grad_norm": 0.50605309009552, "learning_rate": 9.982869500668804e-06, "loss": 0.435, "step": 798 }, { "epoch": 0.37157029917842194, "grad_norm": 0.6257908344268799, "learning_rate": 9.982644970837499e-06, "loss": 0.408, "step": 799 }, { "epoch": 0.37203534335761895, "grad_norm": 0.5521469712257385, "learning_rate": 9.982418981670414e-06, "loss": 0.4362, "step": 800 }, { "epoch": 0.372500387536816, "grad_norm": 0.6889628171920776, "learning_rate": 9.982191533233742e-06, "loss": 0.4091, "step": 801 }, { "epoch": 0.37296543171601304, "grad_norm": 0.5120216608047485, "learning_rate": 9.981962625594094e-06, "loss": 0.4025, "step": 802 }, { "epoch": 0.37343047589521006, "grad_norm": 0.6625473499298096, "learning_rate": 9.981732258818519e-06, "loss": 0.4212, "step": 803 }, { "epoch": 0.3738955200744071, "grad_norm": 0.5750365853309631, "learning_rate": 9.981500432974482e-06, "loss": 0.4146, "step": 804 }, { "epoch": 0.3743605642536041, "grad_norm": 0.5535095930099487, "learning_rate": 9.981267148129884e-06, "loss": 0.4175, "step": 805 }, { "epoch": 0.3748256084328011, "grad_norm": 0.6257491111755371, "learning_rate": 9.981032404353052e-06, "loss": 0.3994, "step": 806 }, { "epoch": 0.3752906526119981, "grad_norm": 0.48537763953208923, "learning_rate": 9.980796201712734e-06, "loss": 0.4095, "step": 807 }, { "epoch": 0.37575569679119514, "grad_norm": 0.5952275395393372, "learning_rate": 9.980558540278113e-06, "loss": 0.4032, "step": 808 }, { "epoch": 0.3762207409703922, "grad_norm": 0.5576409101486206, "learning_rate": 9.980319420118796e-06, "loss": 0.4195, "step": 809 }, { "epoch": 0.3766857851495892, "grad_norm": 0.5643303990364075, "learning_rate": 9.980078841304817e-06, "loss": 0.426, "step": 810 }, { "epoch": 0.37715082932878624, "grad_norm": 0.5821006894111633, "learning_rate": 9.979836803906636e-06, "loss": 0.4227, "step": 811 }, { "epoch": 0.37761587350798326, "grad_norm": 0.46532806754112244, "learning_rate": 9.979593307995145e-06, "loss": 0.3683, "step": 812 }, { "epoch": 0.3780809176871803, "grad_norm": 0.534476637840271, "learning_rate": 9.979348353641659e-06, "loss": 0.4112, "step": 813 }, { "epoch": 0.3785459618663773, "grad_norm": 0.5859639644622803, "learning_rate": 9.979101940917918e-06, "loss": 0.4257, "step": 814 }, { "epoch": 0.3790110060455743, "grad_norm": 0.4468208849430084, "learning_rate": 9.978854069896096e-06, "loss": 0.4216, "step": 815 }, { "epoch": 0.3794760502247714, "grad_norm": 0.597046434879303, "learning_rate": 9.97860474064879e-06, "loss": 0.4121, "step": 816 }, { "epoch": 0.3799410944039684, "grad_norm": 0.5563921332359314, "learning_rate": 9.978353953249023e-06, "loss": 0.4184, "step": 817 }, { "epoch": 0.3804061385831654, "grad_norm": 0.49227768182754517, "learning_rate": 9.978101707770247e-06, "loss": 0.4037, "step": 818 }, { "epoch": 0.3808711827623624, "grad_norm": 0.48585137724876404, "learning_rate": 9.977848004286342e-06, "loss": 0.3985, "step": 819 }, { "epoch": 0.38133622694155944, "grad_norm": 0.500493586063385, "learning_rate": 9.977592842871612e-06, "loss": 0.4261, "step": 820 }, { "epoch": 0.38180127112075646, "grad_norm": 0.5106586217880249, "learning_rate": 9.97733622360079e-06, "loss": 0.4301, "step": 821 }, { "epoch": 0.3822663152999535, "grad_norm": 0.4543416500091553, "learning_rate": 9.977078146549036e-06, "loss": 0.4127, "step": 822 }, { "epoch": 0.38273135947915055, "grad_norm": 0.6294241547584534, "learning_rate": 9.976818611791937e-06, "loss": 0.4152, "step": 823 }, { "epoch": 0.38319640365834756, "grad_norm": 0.5176417827606201, "learning_rate": 9.976557619405503e-06, "loss": 0.4029, "step": 824 }, { "epoch": 0.3836614478375446, "grad_norm": 0.527394711971283, "learning_rate": 9.97629516946618e-06, "loss": 0.4007, "step": 825 }, { "epoch": 0.3841264920167416, "grad_norm": 0.5030791163444519, "learning_rate": 9.976031262050832e-06, "loss": 0.3884, "step": 826 }, { "epoch": 0.3845915361959386, "grad_norm": 0.5006067156791687, "learning_rate": 9.975765897236754e-06, "loss": 0.4097, "step": 827 }, { "epoch": 0.3850565803751356, "grad_norm": 0.5824684500694275, "learning_rate": 9.975499075101667e-06, "loss": 0.4203, "step": 828 }, { "epoch": 0.38552162455433264, "grad_norm": 0.48303505778312683, "learning_rate": 9.975230795723717e-06, "loss": 0.3836, "step": 829 }, { "epoch": 0.38598666873352966, "grad_norm": 0.5456617474555969, "learning_rate": 9.974961059181482e-06, "loss": 0.427, "step": 830 }, { "epoch": 0.38645171291272673, "grad_norm": 0.4804321229457855, "learning_rate": 9.97468986555396e-06, "loss": 0.409, "step": 831 }, { "epoch": 0.38691675709192375, "grad_norm": 0.4723159372806549, "learning_rate": 9.974417214920584e-06, "loss": 0.4049, "step": 832 }, { "epoch": 0.38738180127112076, "grad_norm": 0.5413050651550293, "learning_rate": 9.974143107361205e-06, "loss": 0.4057, "step": 833 }, { "epoch": 0.3878468454503178, "grad_norm": 0.5091968774795532, "learning_rate": 9.973867542956104e-06, "loss": 0.4015, "step": 834 }, { "epoch": 0.3883118896295148, "grad_norm": 0.4703305959701538, "learning_rate": 9.973590521785992e-06, "loss": 0.4009, "step": 835 }, { "epoch": 0.3887769338087118, "grad_norm": 0.5493623614311218, "learning_rate": 9.973312043932004e-06, "loss": 0.424, "step": 836 }, { "epoch": 0.3892419779879088, "grad_norm": 0.590499997138977, "learning_rate": 9.9730321094757e-06, "loss": 0.4294, "step": 837 }, { "epoch": 0.3897070221671059, "grad_norm": 0.4965055584907532, "learning_rate": 9.972750718499067e-06, "loss": 0.3899, "step": 838 }, { "epoch": 0.3901720663463029, "grad_norm": 0.5021485686302185, "learning_rate": 9.972467871084524e-06, "loss": 0.4507, "step": 839 }, { "epoch": 0.39063711052549993, "grad_norm": 0.5755704045295715, "learning_rate": 9.97218356731491e-06, "loss": 0.3978, "step": 840 }, { "epoch": 0.39110215470469695, "grad_norm": 0.5343739986419678, "learning_rate": 9.971897807273492e-06, "loss": 0.3725, "step": 841 }, { "epoch": 0.39156719888389396, "grad_norm": 0.5048686265945435, "learning_rate": 9.971610591043966e-06, "loss": 0.4254, "step": 842 }, { "epoch": 0.392032243063091, "grad_norm": 0.5324344038963318, "learning_rate": 9.971321918710452e-06, "loss": 0.3935, "step": 843 }, { "epoch": 0.392497287242288, "grad_norm": 0.6189853549003601, "learning_rate": 9.9710317903575e-06, "loss": 0.4212, "step": 844 }, { "epoch": 0.39296233142148507, "grad_norm": 0.48477646708488464, "learning_rate": 9.97074020607008e-06, "loss": 0.4003, "step": 845 }, { "epoch": 0.3934273756006821, "grad_norm": 0.548075795173645, "learning_rate": 9.970447165933594e-06, "loss": 0.4232, "step": 846 }, { "epoch": 0.3938924197798791, "grad_norm": 0.5070050954818726, "learning_rate": 9.97015267003387e-06, "loss": 0.4053, "step": 847 }, { "epoch": 0.3943574639590761, "grad_norm": 0.5388278365135193, "learning_rate": 9.96985671845716e-06, "loss": 0.4132, "step": 848 }, { "epoch": 0.39482250813827313, "grad_norm": 0.5621599555015564, "learning_rate": 9.969559311290142e-06, "loss": 0.4126, "step": 849 }, { "epoch": 0.39528755231747015, "grad_norm": 0.500983476638794, "learning_rate": 9.969260448619925e-06, "loss": 0.4072, "step": 850 }, { "epoch": 0.39575259649666716, "grad_norm": 0.5636142492294312, "learning_rate": 9.968960130534036e-06, "loss": 0.3949, "step": 851 }, { "epoch": 0.3962176406758642, "grad_norm": 0.5257579684257507, "learning_rate": 9.96865835712044e-06, "loss": 0.3993, "step": 852 }, { "epoch": 0.39668268485506125, "grad_norm": 0.6351518630981445, "learning_rate": 9.968355128467515e-06, "loss": 0.4175, "step": 853 }, { "epoch": 0.39714772903425827, "grad_norm": 0.4981622099876404, "learning_rate": 9.968050444664074e-06, "loss": 0.4217, "step": 854 }, { "epoch": 0.3976127732134553, "grad_norm": 0.5462387800216675, "learning_rate": 9.967744305799358e-06, "loss": 0.4094, "step": 855 }, { "epoch": 0.3980778173926523, "grad_norm": 0.599955141544342, "learning_rate": 9.967436711963026e-06, "loss": 0.3897, "step": 856 }, { "epoch": 0.3985428615718493, "grad_norm": 0.4772481918334961, "learning_rate": 9.967127663245167e-06, "loss": 0.426, "step": 857 }, { "epoch": 0.39900790575104633, "grad_norm": 0.6691299676895142, "learning_rate": 9.966817159736295e-06, "loss": 0.3885, "step": 858 }, { "epoch": 0.39947294993024335, "grad_norm": 0.6346452832221985, "learning_rate": 9.966505201527357e-06, "loss": 0.4217, "step": 859 }, { "epoch": 0.3999379941094404, "grad_norm": 0.47586584091186523, "learning_rate": 9.966191788709716e-06, "loss": 0.4165, "step": 860 }, { "epoch": 0.40040303828863744, "grad_norm": 0.6300751566886902, "learning_rate": 9.965876921375165e-06, "loss": 0.4087, "step": 861 }, { "epoch": 0.40086808246783445, "grad_norm": 0.5367612242698669, "learning_rate": 9.965560599615928e-06, "loss": 0.3919, "step": 862 }, { "epoch": 0.40133312664703147, "grad_norm": 0.4811550974845886, "learning_rate": 9.965242823524648e-06, "loss": 0.4112, "step": 863 }, { "epoch": 0.4017981708262285, "grad_norm": 0.6218395233154297, "learning_rate": 9.964923593194394e-06, "loss": 0.3741, "step": 864 }, { "epoch": 0.4022632150054255, "grad_norm": 0.5514636635780334, "learning_rate": 9.964602908718667e-06, "loss": 0.4253, "step": 865 }, { "epoch": 0.4027282591846225, "grad_norm": 0.5390025973320007, "learning_rate": 9.964280770191388e-06, "loss": 0.3988, "step": 866 }, { "epoch": 0.4031933033638196, "grad_norm": 0.64164799451828, "learning_rate": 9.963957177706908e-06, "loss": 0.4129, "step": 867 }, { "epoch": 0.4036583475430166, "grad_norm": 0.4808669984340668, "learning_rate": 9.96363213136e-06, "loss": 0.4205, "step": 868 }, { "epoch": 0.4041233917222136, "grad_norm": 0.5149733424186707, "learning_rate": 9.963305631245866e-06, "loss": 0.4049, "step": 869 }, { "epoch": 0.40458843590141064, "grad_norm": 0.5427307486534119, "learning_rate": 9.962977677460132e-06, "loss": 0.402, "step": 870 }, { "epoch": 0.40505348008060765, "grad_norm": 0.5210288763046265, "learning_rate": 9.96264827009885e-06, "loss": 0.4063, "step": 871 }, { "epoch": 0.40551852425980467, "grad_norm": 0.5169207453727722, "learning_rate": 9.962317409258501e-06, "loss": 0.4032, "step": 872 }, { "epoch": 0.4059835684390017, "grad_norm": 0.5519936084747314, "learning_rate": 9.961985095035987e-06, "loss": 0.3884, "step": 873 }, { "epoch": 0.4064486126181987, "grad_norm": 0.5302849411964417, "learning_rate": 9.961651327528636e-06, "loss": 0.406, "step": 874 }, { "epoch": 0.4069136567973958, "grad_norm": 0.5135921835899353, "learning_rate": 9.961316106834202e-06, "loss": 0.3885, "step": 875 }, { "epoch": 0.4073787009765928, "grad_norm": 0.4829360842704773, "learning_rate": 9.96097943305087e-06, "loss": 0.4054, "step": 876 }, { "epoch": 0.4078437451557898, "grad_norm": 0.48682668805122375, "learning_rate": 9.960641306277244e-06, "loss": 0.3872, "step": 877 }, { "epoch": 0.4083087893349868, "grad_norm": 0.5358735918998718, "learning_rate": 9.960301726612355e-06, "loss": 0.4106, "step": 878 }, { "epoch": 0.40877383351418384, "grad_norm": 0.491878867149353, "learning_rate": 9.959960694155662e-06, "loss": 0.4054, "step": 879 }, { "epoch": 0.40923887769338085, "grad_norm": 0.4896703362464905, "learning_rate": 9.959618209007045e-06, "loss": 0.3981, "step": 880 }, { "epoch": 0.40970392187257787, "grad_norm": 0.5020521283149719, "learning_rate": 9.959274271266816e-06, "loss": 0.3881, "step": 881 }, { "epoch": 0.41016896605177494, "grad_norm": 0.5467438101768494, "learning_rate": 9.958928881035708e-06, "loss": 0.4188, "step": 882 }, { "epoch": 0.41063401023097196, "grad_norm": 0.455102801322937, "learning_rate": 9.958582038414878e-06, "loss": 0.407, "step": 883 }, { "epoch": 0.411099054410169, "grad_norm": 0.4519009292125702, "learning_rate": 9.958233743505912e-06, "loss": 0.4008, "step": 884 }, { "epoch": 0.411564098589366, "grad_norm": 0.5280046463012695, "learning_rate": 9.957883996410821e-06, "loss": 0.3925, "step": 885 }, { "epoch": 0.412029142768563, "grad_norm": 0.4995780885219574, "learning_rate": 9.95753279723204e-06, "loss": 0.397, "step": 886 }, { "epoch": 0.41249418694776, "grad_norm": 0.4849075675010681, "learning_rate": 9.957180146072426e-06, "loss": 0.4214, "step": 887 }, { "epoch": 0.41295923112695704, "grad_norm": 0.548945426940918, "learning_rate": 9.956826043035268e-06, "loss": 0.4044, "step": 888 }, { "epoch": 0.4134242753061541, "grad_norm": 0.5245530009269714, "learning_rate": 9.956470488224278e-06, "loss": 0.4133, "step": 889 }, { "epoch": 0.4138893194853511, "grad_norm": 0.5736802816390991, "learning_rate": 9.95611348174359e-06, "loss": 0.3939, "step": 890 }, { "epoch": 0.41435436366454814, "grad_norm": 0.5248284935951233, "learning_rate": 9.955755023697767e-06, "loss": 0.4058, "step": 891 }, { "epoch": 0.41481940784374516, "grad_norm": 0.4462113678455353, "learning_rate": 9.955395114191792e-06, "loss": 0.4168, "step": 892 }, { "epoch": 0.4152844520229422, "grad_norm": 0.6396908164024353, "learning_rate": 9.955033753331082e-06, "loss": 0.4049, "step": 893 }, { "epoch": 0.4157494962021392, "grad_norm": 0.5714189410209656, "learning_rate": 9.954670941221469e-06, "loss": 0.41, "step": 894 }, { "epoch": 0.4162145403813362, "grad_norm": 0.5378727316856384, "learning_rate": 9.954306677969218e-06, "loss": 0.3946, "step": 895 }, { "epoch": 0.4166795845605333, "grad_norm": 0.5027511715888977, "learning_rate": 9.953940963681015e-06, "loss": 0.3757, "step": 896 }, { "epoch": 0.4171446287397303, "grad_norm": 0.5424126386642456, "learning_rate": 9.953573798463972e-06, "loss": 0.417, "step": 897 }, { "epoch": 0.4176096729189273, "grad_norm": 0.5236470103263855, "learning_rate": 9.953205182425623e-06, "loss": 0.415, "step": 898 }, { "epoch": 0.4180747170981243, "grad_norm": 0.5600041747093201, "learning_rate": 9.952835115673933e-06, "loss": 0.4227, "step": 899 }, { "epoch": 0.41853976127732134, "grad_norm": 0.44945916533470154, "learning_rate": 9.952463598317286e-06, "loss": 0.405, "step": 900 }, { "epoch": 0.41900480545651836, "grad_norm": 0.5260853171348572, "learning_rate": 9.952090630464495e-06, "loss": 0.4051, "step": 901 }, { "epoch": 0.4194698496357154, "grad_norm": 0.5023328065872192, "learning_rate": 9.951716212224798e-06, "loss": 0.408, "step": 902 }, { "epoch": 0.4199348938149124, "grad_norm": 0.5364269018173218, "learning_rate": 9.951340343707852e-06, "loss": 0.4036, "step": 903 }, { "epoch": 0.42039993799410946, "grad_norm": 0.5100162625312805, "learning_rate": 9.950963025023746e-06, "loss": 0.4104, "step": 904 }, { "epoch": 0.4208649821733065, "grad_norm": 0.5659197568893433, "learning_rate": 9.950584256282988e-06, "loss": 0.4216, "step": 905 }, { "epoch": 0.4213300263525035, "grad_norm": 0.5025163292884827, "learning_rate": 9.950204037596516e-06, "loss": 0.3989, "step": 906 }, { "epoch": 0.4217950705317005, "grad_norm": 0.6001001596450806, "learning_rate": 9.949822369075687e-06, "loss": 0.389, "step": 907 }, { "epoch": 0.4222601147108975, "grad_norm": 0.5134057998657227, "learning_rate": 9.949439250832287e-06, "loss": 0.3975, "step": 908 }, { "epoch": 0.42272515889009454, "grad_norm": 0.5745139122009277, "learning_rate": 9.949054682978525e-06, "loss": 0.4186, "step": 909 }, { "epoch": 0.42319020306929156, "grad_norm": 0.6479323506355286, "learning_rate": 9.948668665627034e-06, "loss": 0.4201, "step": 910 }, { "epoch": 0.42365524724848863, "grad_norm": 0.5262349843978882, "learning_rate": 9.948281198890875e-06, "loss": 0.4016, "step": 911 }, { "epoch": 0.42412029142768565, "grad_norm": 0.6013730764389038, "learning_rate": 9.947892282883527e-06, "loss": 0.3814, "step": 912 }, { "epoch": 0.42458533560688266, "grad_norm": 0.6294665336608887, "learning_rate": 9.947501917718897e-06, "loss": 0.391, "step": 913 }, { "epoch": 0.4250503797860797, "grad_norm": 0.5530297756195068, "learning_rate": 9.947110103511322e-06, "loss": 0.3957, "step": 914 }, { "epoch": 0.4255154239652767, "grad_norm": 0.5969755053520203, "learning_rate": 9.946716840375552e-06, "loss": 0.3933, "step": 915 }, { "epoch": 0.4259804681444737, "grad_norm": 0.6482888460159302, "learning_rate": 9.946322128426771e-06, "loss": 0.4384, "step": 916 }, { "epoch": 0.42644551232367073, "grad_norm": 0.6915735602378845, "learning_rate": 9.945925967780581e-06, "loss": 0.4165, "step": 917 }, { "epoch": 0.4269105565028678, "grad_norm": 0.5215064883232117, "learning_rate": 9.945528358553014e-06, "loss": 0.4123, "step": 918 }, { "epoch": 0.4273756006820648, "grad_norm": 0.6709214448928833, "learning_rate": 9.945129300860521e-06, "loss": 0.4214, "step": 919 }, { "epoch": 0.42784064486126183, "grad_norm": 0.5739949345588684, "learning_rate": 9.94472879481998e-06, "loss": 0.3663, "step": 920 }, { "epoch": 0.42830568904045885, "grad_norm": 0.5512825846672058, "learning_rate": 9.944326840548693e-06, "loss": 0.386, "step": 921 }, { "epoch": 0.42877073321965586, "grad_norm": 0.6003051400184631, "learning_rate": 9.943923438164387e-06, "loss": 0.4002, "step": 922 }, { "epoch": 0.4292357773988529, "grad_norm": 0.5427140593528748, "learning_rate": 9.943518587785208e-06, "loss": 0.3789, "step": 923 }, { "epoch": 0.4297008215780499, "grad_norm": 0.4838401675224304, "learning_rate": 9.943112289529737e-06, "loss": 0.4048, "step": 924 }, { "epoch": 0.4301658657572469, "grad_norm": 0.487448126077652, "learning_rate": 9.942704543516966e-06, "loss": 0.4082, "step": 925 }, { "epoch": 0.430630909936444, "grad_norm": 0.5517868995666504, "learning_rate": 9.94229534986632e-06, "loss": 0.415, "step": 926 }, { "epoch": 0.431095954115641, "grad_norm": 0.5240123867988586, "learning_rate": 9.941884708697644e-06, "loss": 0.3909, "step": 927 }, { "epoch": 0.431560998294838, "grad_norm": 0.5013271570205688, "learning_rate": 9.941472620131208e-06, "loss": 0.3914, "step": 928 }, { "epoch": 0.43202604247403503, "grad_norm": 0.49305182695388794, "learning_rate": 9.941059084287708e-06, "loss": 0.3725, "step": 929 }, { "epoch": 0.43249108665323205, "grad_norm": 0.5340526103973389, "learning_rate": 9.940644101288259e-06, "loss": 0.3961, "step": 930 }, { "epoch": 0.43295613083242906, "grad_norm": 0.5031039118766785, "learning_rate": 9.940227671254406e-06, "loss": 0.4108, "step": 931 }, { "epoch": 0.4334211750116261, "grad_norm": 0.5576218962669373, "learning_rate": 9.939809794308111e-06, "loss": 0.3985, "step": 932 }, { "epoch": 0.43388621919082315, "grad_norm": 0.48340535163879395, "learning_rate": 9.939390470571769e-06, "loss": 0.4029, "step": 933 }, { "epoch": 0.43435126337002017, "grad_norm": 0.5440919399261475, "learning_rate": 9.938969700168186e-06, "loss": 0.3998, "step": 934 }, { "epoch": 0.4348163075492172, "grad_norm": 0.44848132133483887, "learning_rate": 9.938547483220602e-06, "loss": 0.3841, "step": 935 }, { "epoch": 0.4352813517284142, "grad_norm": 0.5585298538208008, "learning_rate": 9.93812381985268e-06, "loss": 0.4138, "step": 936 }, { "epoch": 0.4357463959076112, "grad_norm": 0.5106562376022339, "learning_rate": 9.9376987101885e-06, "loss": 0.3974, "step": 937 }, { "epoch": 0.43621144008680823, "grad_norm": 0.5067592263221741, "learning_rate": 9.937272154352573e-06, "loss": 0.3889, "step": 938 }, { "epoch": 0.43667648426600525, "grad_norm": 0.5147528052330017, "learning_rate": 9.936844152469828e-06, "loss": 0.3935, "step": 939 }, { "epoch": 0.4371415284452023, "grad_norm": 0.48401129245758057, "learning_rate": 9.936414704665622e-06, "loss": 0.3967, "step": 940 }, { "epoch": 0.43760657262439934, "grad_norm": 0.6152265071868896, "learning_rate": 9.935983811065732e-06, "loss": 0.4419, "step": 941 }, { "epoch": 0.43807161680359635, "grad_norm": 0.4920692443847656, "learning_rate": 9.935551471796358e-06, "loss": 0.3831, "step": 942 }, { "epoch": 0.43853666098279337, "grad_norm": 0.4826490581035614, "learning_rate": 9.935117686984128e-06, "loss": 0.3911, "step": 943 }, { "epoch": 0.4390017051619904, "grad_norm": 0.5583831667900085, "learning_rate": 9.93468245675609e-06, "loss": 0.4013, "step": 944 }, { "epoch": 0.4394667493411874, "grad_norm": 0.5026276111602783, "learning_rate": 9.934245781239714e-06, "loss": 0.3954, "step": 945 }, { "epoch": 0.4399317935203844, "grad_norm": 0.6956951022148132, "learning_rate": 9.933807660562898e-06, "loss": 0.4083, "step": 946 }, { "epoch": 0.44039683769958143, "grad_norm": 0.43628400564193726, "learning_rate": 9.933368094853958e-06, "loss": 0.3529, "step": 947 }, { "epoch": 0.4408618818787785, "grad_norm": 0.5511034727096558, "learning_rate": 9.932927084241635e-06, "loss": 0.4084, "step": 948 }, { "epoch": 0.4413269260579755, "grad_norm": 0.5073305368423462, "learning_rate": 9.932484628855097e-06, "loss": 0.3879, "step": 949 }, { "epoch": 0.44179197023717254, "grad_norm": 0.5978685617446899, "learning_rate": 9.932040728823928e-06, "loss": 0.4192, "step": 950 }, { "epoch": 0.44225701441636955, "grad_norm": 0.5503034591674805, "learning_rate": 9.931595384278143e-06, "loss": 0.4073, "step": 951 }, { "epoch": 0.44272205859556657, "grad_norm": 0.5104547739028931, "learning_rate": 9.931148595348176e-06, "loss": 0.3689, "step": 952 }, { "epoch": 0.4431871027747636, "grad_norm": 0.594742476940155, "learning_rate": 9.93070036216488e-06, "loss": 0.418, "step": 953 }, { "epoch": 0.4436521469539606, "grad_norm": 0.4962625205516815, "learning_rate": 9.930250684859542e-06, "loss": 0.4008, "step": 954 }, { "epoch": 0.4441171911331577, "grad_norm": 0.5209368467330933, "learning_rate": 9.929799563563858e-06, "loss": 0.3777, "step": 955 }, { "epoch": 0.4445822353123547, "grad_norm": 0.5389686226844788, "learning_rate": 9.929346998409958e-06, "loss": 0.4047, "step": 956 }, { "epoch": 0.4450472794915517, "grad_norm": 0.5484662652015686, "learning_rate": 9.92889298953039e-06, "loss": 0.42, "step": 957 }, { "epoch": 0.4455123236707487, "grad_norm": 0.5775797963142395, "learning_rate": 9.928437537058126e-06, "loss": 0.4016, "step": 958 }, { "epoch": 0.44597736784994574, "grad_norm": 0.48525333404541016, "learning_rate": 9.927980641126562e-06, "loss": 0.385, "step": 959 }, { "epoch": 0.44644241202914275, "grad_norm": 0.6398960947990417, "learning_rate": 9.927522301869515e-06, "loss": 0.4008, "step": 960 }, { "epoch": 0.44690745620833977, "grad_norm": 0.5224960446357727, "learning_rate": 9.927062519421223e-06, "loss": 0.4061, "step": 961 }, { "epoch": 0.44737250038753684, "grad_norm": 0.5396710634231567, "learning_rate": 9.926601293916349e-06, "loss": 0.4128, "step": 962 }, { "epoch": 0.44783754456673386, "grad_norm": 0.540110170841217, "learning_rate": 9.926138625489981e-06, "loss": 0.4037, "step": 963 }, { "epoch": 0.4483025887459309, "grad_norm": 0.5284585952758789, "learning_rate": 9.925674514277625e-06, "loss": 0.3929, "step": 964 }, { "epoch": 0.4487676329251279, "grad_norm": 0.5301303863525391, "learning_rate": 9.925208960415214e-06, "loss": 0.4064, "step": 965 }, { "epoch": 0.4492326771043249, "grad_norm": 0.5405663847923279, "learning_rate": 9.924741964039098e-06, "loss": 0.3813, "step": 966 }, { "epoch": 0.4496977212835219, "grad_norm": 0.567683756351471, "learning_rate": 9.924273525286053e-06, "loss": 0.3801, "step": 967 }, { "epoch": 0.45016276546271894, "grad_norm": 0.5406811237335205, "learning_rate": 9.92380364429328e-06, "loss": 0.394, "step": 968 }, { "epoch": 0.45062780964191596, "grad_norm": 0.6923626065254211, "learning_rate": 9.923332321198396e-06, "loss": 0.389, "step": 969 }, { "epoch": 0.451092853821113, "grad_norm": 0.5620383620262146, "learning_rate": 9.922859556139447e-06, "loss": 0.393, "step": 970 }, { "epoch": 0.45155789800031004, "grad_norm": 0.5418940782546997, "learning_rate": 9.922385349254895e-06, "loss": 0.4071, "step": 971 }, { "epoch": 0.45202294217950706, "grad_norm": 0.5649824738502502, "learning_rate": 9.921909700683632e-06, "loss": 0.3882, "step": 972 }, { "epoch": 0.4524879863587041, "grad_norm": 0.5053907036781311, "learning_rate": 9.921432610564962e-06, "loss": 0.3893, "step": 973 }, { "epoch": 0.4529530305379011, "grad_norm": 0.5486416816711426, "learning_rate": 9.920954079038623e-06, "loss": 0.4207, "step": 974 }, { "epoch": 0.4534180747170981, "grad_norm": 0.4447232186794281, "learning_rate": 9.920474106244764e-06, "loss": 0.3837, "step": 975 }, { "epoch": 0.4538831188962951, "grad_norm": 0.610753059387207, "learning_rate": 9.919992692323965e-06, "loss": 0.3905, "step": 976 }, { "epoch": 0.4543481630754922, "grad_norm": 0.5709927678108215, "learning_rate": 9.919509837417221e-06, "loss": 0.3985, "step": 977 }, { "epoch": 0.4548132072546892, "grad_norm": 0.6064723134040833, "learning_rate": 9.919025541665955e-06, "loss": 0.4376, "step": 978 }, { "epoch": 0.4552782514338862, "grad_norm": 0.6571378111839294, "learning_rate": 9.918539805212008e-06, "loss": 0.3945, "step": 979 }, { "epoch": 0.45574329561308324, "grad_norm": 0.528195321559906, "learning_rate": 9.918052628197645e-06, "loss": 0.406, "step": 980 }, { "epoch": 0.45620833979228026, "grad_norm": 0.6316954493522644, "learning_rate": 9.917564010765551e-06, "loss": 0.3779, "step": 981 }, { "epoch": 0.4566733839714773, "grad_norm": 0.5615218281745911, "learning_rate": 9.917073953058836e-06, "loss": 0.4308, "step": 982 }, { "epoch": 0.4571384281506743, "grad_norm": 0.5495420098304749, "learning_rate": 9.916582455221029e-06, "loss": 0.4006, "step": 983 }, { "epoch": 0.45760347232987136, "grad_norm": 0.5130931735038757, "learning_rate": 9.916089517396081e-06, "loss": 0.3881, "step": 984 }, { "epoch": 0.4580685165090684, "grad_norm": 0.6450378894805908, "learning_rate": 9.915595139728366e-06, "loss": 0.4132, "step": 985 }, { "epoch": 0.4585335606882654, "grad_norm": 0.5673415064811707, "learning_rate": 9.915099322362681e-06, "loss": 0.4007, "step": 986 }, { "epoch": 0.4589986048674624, "grad_norm": 0.5420086979866028, "learning_rate": 9.91460206544424e-06, "loss": 0.4066, "step": 987 }, { "epoch": 0.45946364904665943, "grad_norm": 0.5733571648597717, "learning_rate": 9.914103369118682e-06, "loss": 0.371, "step": 988 }, { "epoch": 0.45992869322585644, "grad_norm": 0.5512123107910156, "learning_rate": 9.913603233532067e-06, "loss": 0.3742, "step": 989 }, { "epoch": 0.46039373740505346, "grad_norm": 0.5796908140182495, "learning_rate": 9.913101658830879e-06, "loss": 0.4115, "step": 990 }, { "epoch": 0.4608587815842505, "grad_norm": 0.5767021179199219, "learning_rate": 9.91259864516202e-06, "loss": 0.4203, "step": 991 }, { "epoch": 0.46132382576344755, "grad_norm": 0.5332590937614441, "learning_rate": 9.912094192672812e-06, "loss": 0.3986, "step": 992 }, { "epoch": 0.46178886994264456, "grad_norm": 0.4922131597995758, "learning_rate": 9.911588301511004e-06, "loss": 0.4085, "step": 993 }, { "epoch": 0.4622539141218416, "grad_norm": 0.5280721783638, "learning_rate": 9.911080971824762e-06, "loss": 0.4059, "step": 994 }, { "epoch": 0.4627189583010386, "grad_norm": 0.5713456869125366, "learning_rate": 9.910572203762676e-06, "loss": 0.4249, "step": 995 }, { "epoch": 0.4631840024802356, "grad_norm": 0.5211708545684814, "learning_rate": 9.910061997473753e-06, "loss": 0.3874, "step": 996 }, { "epoch": 0.46364904665943263, "grad_norm": 0.5675033330917358, "learning_rate": 9.909550353107426e-06, "loss": 0.4086, "step": 997 }, { "epoch": 0.46411409083862964, "grad_norm": 0.6571573615074158, "learning_rate": 9.909037270813547e-06, "loss": 0.4148, "step": 998 }, { "epoch": 0.4645791350178267, "grad_norm": 0.6051549911499023, "learning_rate": 9.908522750742391e-06, "loss": 0.4061, "step": 999 }, { "epoch": 0.46504417919702373, "grad_norm": 0.5821381211280823, "learning_rate": 9.90800679304465e-06, "loss": 0.4033, "step": 1000 }, { "epoch": 0.46550922337622075, "grad_norm": 0.6213275194168091, "learning_rate": 9.907489397871441e-06, "loss": 0.4018, "step": 1001 }, { "epoch": 0.46597426755541776, "grad_norm": 0.5967691540718079, "learning_rate": 9.9069705653743e-06, "loss": 0.376, "step": 1002 }, { "epoch": 0.4664393117346148, "grad_norm": 0.4699487090110779, "learning_rate": 9.906450295705188e-06, "loss": 0.4172, "step": 1003 }, { "epoch": 0.4669043559138118, "grad_norm": 0.6863399147987366, "learning_rate": 9.905928589016479e-06, "loss": 0.4105, "step": 1004 }, { "epoch": 0.4673694000930088, "grad_norm": 0.5886141657829285, "learning_rate": 9.905405445460972e-06, "loss": 0.3957, "step": 1005 }, { "epoch": 0.4678344442722059, "grad_norm": 0.4847775399684906, "learning_rate": 9.90488086519189e-06, "loss": 0.4155, "step": 1006 }, { "epoch": 0.4682994884514029, "grad_norm": 0.6540789604187012, "learning_rate": 9.904354848362876e-06, "loss": 0.4178, "step": 1007 }, { "epoch": 0.4687645326305999, "grad_norm": 0.5532976388931274, "learning_rate": 9.903827395127987e-06, "loss": 0.3777, "step": 1008 }, { "epoch": 0.46922957680979693, "grad_norm": 0.44451895356178284, "learning_rate": 9.903298505641707e-06, "loss": 0.4003, "step": 1009 }, { "epoch": 0.46969462098899395, "grad_norm": 0.5525593757629395, "learning_rate": 9.902768180058942e-06, "loss": 0.3992, "step": 1010 }, { "epoch": 0.47015966516819097, "grad_norm": 0.480257123708725, "learning_rate": 9.902236418535012e-06, "loss": 0.3906, "step": 1011 }, { "epoch": 0.470624709347388, "grad_norm": 0.5119292140007019, "learning_rate": 9.901703221225663e-06, "loss": 0.42, "step": 1012 }, { "epoch": 0.47108975352658505, "grad_norm": 0.500453531742096, "learning_rate": 9.901168588287057e-06, "loss": 0.4131, "step": 1013 }, { "epoch": 0.47155479770578207, "grad_norm": 0.5501608848571777, "learning_rate": 9.900632519875786e-06, "loss": 0.3866, "step": 1014 }, { "epoch": 0.4720198418849791, "grad_norm": 0.4848511219024658, "learning_rate": 9.900095016148849e-06, "loss": 0.4138, "step": 1015 }, { "epoch": 0.4724848860641761, "grad_norm": 0.5268184542655945, "learning_rate": 9.899556077263676e-06, "loss": 0.3947, "step": 1016 }, { "epoch": 0.4729499302433731, "grad_norm": 0.555121660232544, "learning_rate": 9.899015703378115e-06, "loss": 0.4129, "step": 1017 }, { "epoch": 0.47341497442257013, "grad_norm": 0.5795762538909912, "learning_rate": 9.898473894650425e-06, "loss": 0.4121, "step": 1018 }, { "epoch": 0.47388001860176715, "grad_norm": 0.515906810760498, "learning_rate": 9.8979306512393e-06, "loss": 0.3946, "step": 1019 }, { "epoch": 0.47434506278096417, "grad_norm": 0.5714851021766663, "learning_rate": 9.897385973303845e-06, "loss": 0.3858, "step": 1020 }, { "epoch": 0.47481010696016124, "grad_norm": 0.6117233037948608, "learning_rate": 9.896839861003588e-06, "loss": 0.411, "step": 1021 }, { "epoch": 0.47527515113935825, "grad_norm": 0.5009344220161438, "learning_rate": 9.896292314498475e-06, "loss": 0.3939, "step": 1022 }, { "epoch": 0.47574019531855527, "grad_norm": 0.46868664026260376, "learning_rate": 9.895743333948875e-06, "loss": 0.412, "step": 1023 }, { "epoch": 0.4762052394977523, "grad_norm": 0.6705508828163147, "learning_rate": 9.895192919515575e-06, "loss": 0.3981, "step": 1024 }, { "epoch": 0.4766702836769493, "grad_norm": 0.535101056098938, "learning_rate": 9.894641071359784e-06, "loss": 0.4057, "step": 1025 }, { "epoch": 0.4771353278561463, "grad_norm": 0.4733811616897583, "learning_rate": 9.894087789643123e-06, "loss": 0.4066, "step": 1026 }, { "epoch": 0.47760037203534333, "grad_norm": 0.514105498790741, "learning_rate": 9.893533074527647e-06, "loss": 0.3957, "step": 1027 }, { "epoch": 0.4780654162145404, "grad_norm": 0.50275719165802, "learning_rate": 9.892976926175819e-06, "loss": 0.386, "step": 1028 }, { "epoch": 0.4785304603937374, "grad_norm": 0.4467027187347412, "learning_rate": 9.892419344750528e-06, "loss": 0.3965, "step": 1029 }, { "epoch": 0.47899550457293444, "grad_norm": 0.48868438601493835, "learning_rate": 9.891860330415077e-06, "loss": 0.408, "step": 1030 }, { "epoch": 0.47946054875213145, "grad_norm": 0.4734288156032562, "learning_rate": 9.891299883333197e-06, "loss": 0.3778, "step": 1031 }, { "epoch": 0.47992559293132847, "grad_norm": 0.49330592155456543, "learning_rate": 9.890738003669029e-06, "loss": 0.3766, "step": 1032 }, { "epoch": 0.4803906371105255, "grad_norm": 0.5068243145942688, "learning_rate": 9.890174691587142e-06, "loss": 0.3832, "step": 1033 }, { "epoch": 0.4808556812897225, "grad_norm": 0.4672917127609253, "learning_rate": 9.889609947252519e-06, "loss": 0.3933, "step": 1034 }, { "epoch": 0.4813207254689196, "grad_norm": 0.5433964729309082, "learning_rate": 9.889043770830566e-06, "loss": 0.378, "step": 1035 }, { "epoch": 0.4817857696481166, "grad_norm": 0.4645717740058899, "learning_rate": 9.888476162487106e-06, "loss": 0.3803, "step": 1036 }, { "epoch": 0.4822508138273136, "grad_norm": 0.5179428458213806, "learning_rate": 9.887907122388382e-06, "loss": 0.4093, "step": 1037 }, { "epoch": 0.4827158580065106, "grad_norm": 0.5166782140731812, "learning_rate": 9.887336650701055e-06, "loss": 0.4107, "step": 1038 }, { "epoch": 0.48318090218570764, "grad_norm": 0.4443417489528656, "learning_rate": 9.886764747592212e-06, "loss": 0.3626, "step": 1039 }, { "epoch": 0.48364594636490466, "grad_norm": 0.5102760195732117, "learning_rate": 9.886191413229349e-06, "loss": 0.4014, "step": 1040 }, { "epoch": 0.48411099054410167, "grad_norm": 0.5018773674964905, "learning_rate": 9.885616647780389e-06, "loss": 0.3895, "step": 1041 }, { "epoch": 0.4845760347232987, "grad_norm": 0.5214076638221741, "learning_rate": 9.88504045141367e-06, "loss": 0.389, "step": 1042 }, { "epoch": 0.48504107890249576, "grad_norm": 0.4620245397090912, "learning_rate": 9.884462824297952e-06, "loss": 0.389, "step": 1043 }, { "epoch": 0.4855061230816928, "grad_norm": 0.4827862083911896, "learning_rate": 9.88388376660241e-06, "loss": 0.4153, "step": 1044 }, { "epoch": 0.4859711672608898, "grad_norm": 0.5498597025871277, "learning_rate": 9.883303278496646e-06, "loss": 0.4076, "step": 1045 }, { "epoch": 0.4864362114400868, "grad_norm": 0.49216824769973755, "learning_rate": 9.88272136015067e-06, "loss": 0.4254, "step": 1046 }, { "epoch": 0.4869012556192838, "grad_norm": 0.534773588180542, "learning_rate": 9.882138011734919e-06, "loss": 0.3948, "step": 1047 }, { "epoch": 0.48736629979848084, "grad_norm": 0.5610633492469788, "learning_rate": 9.881553233420244e-06, "loss": 0.41, "step": 1048 }, { "epoch": 0.48783134397767786, "grad_norm": 0.48247793316841125, "learning_rate": 9.88096702537792e-06, "loss": 0.394, "step": 1049 }, { "epoch": 0.4882963881568749, "grad_norm": 0.5383715033531189, "learning_rate": 9.880379387779637e-06, "loss": 0.3893, "step": 1050 }, { "epoch": 0.48876143233607194, "grad_norm": 0.5967577695846558, "learning_rate": 9.879790320797504e-06, "loss": 0.373, "step": 1051 }, { "epoch": 0.48922647651526896, "grad_norm": 0.4835599660873413, "learning_rate": 9.879199824604048e-06, "loss": 0.4082, "step": 1052 }, { "epoch": 0.489691520694466, "grad_norm": 0.5845022797584534, "learning_rate": 9.878607899372217e-06, "loss": 0.3954, "step": 1053 }, { "epoch": 0.490156564873663, "grad_norm": 0.49885934591293335, "learning_rate": 9.878014545275379e-06, "loss": 0.4067, "step": 1054 }, { "epoch": 0.49062160905286, "grad_norm": 0.5852560997009277, "learning_rate": 9.877419762487312e-06, "loss": 0.3932, "step": 1055 }, { "epoch": 0.491086653232057, "grad_norm": 0.48509690165519714, "learning_rate": 9.876823551182223e-06, "loss": 0.433, "step": 1056 }, { "epoch": 0.4915516974112541, "grad_norm": 0.44763508439064026, "learning_rate": 9.876225911534729e-06, "loss": 0.3651, "step": 1057 }, { "epoch": 0.4920167415904511, "grad_norm": 0.5484299063682556, "learning_rate": 9.875626843719871e-06, "loss": 0.3959, "step": 1058 }, { "epoch": 0.49248178576964813, "grad_norm": 0.481046199798584, "learning_rate": 9.875026347913109e-06, "loss": 0.3765, "step": 1059 }, { "epoch": 0.49294682994884514, "grad_norm": 0.5341517925262451, "learning_rate": 9.874424424290313e-06, "loss": 0.3963, "step": 1060 }, { "epoch": 0.49341187412804216, "grad_norm": 0.469200998544693, "learning_rate": 9.87382107302778e-06, "loss": 0.3995, "step": 1061 }, { "epoch": 0.4938769183072392, "grad_norm": 0.5132390260696411, "learning_rate": 9.87321629430222e-06, "loss": 0.3837, "step": 1062 }, { "epoch": 0.4943419624864362, "grad_norm": 0.5703272223472595, "learning_rate": 9.872610088290766e-06, "loss": 0.4025, "step": 1063 }, { "epoch": 0.4948070066656332, "grad_norm": 0.5358856916427612, "learning_rate": 9.87200245517096e-06, "loss": 0.4287, "step": 1064 }, { "epoch": 0.4952720508448303, "grad_norm": 0.41558536887168884, "learning_rate": 9.871393395120774e-06, "loss": 0.3621, "step": 1065 }, { "epoch": 0.4957370950240273, "grad_norm": 0.5119854807853699, "learning_rate": 9.870782908318591e-06, "loss": 0.3781, "step": 1066 }, { "epoch": 0.4962021392032243, "grad_norm": 0.4697974622249603, "learning_rate": 9.87017099494321e-06, "loss": 0.4138, "step": 1067 }, { "epoch": 0.49666718338242133, "grad_norm": 0.4632170796394348, "learning_rate": 9.869557655173849e-06, "loss": 0.3952, "step": 1068 }, { "epoch": 0.49713222756161835, "grad_norm": 0.4312477111816406, "learning_rate": 9.86894288919015e-06, "loss": 0.396, "step": 1069 }, { "epoch": 0.49759727174081536, "grad_norm": 0.47128409147262573, "learning_rate": 9.868326697172164e-06, "loss": 0.4233, "step": 1070 }, { "epoch": 0.4980623159200124, "grad_norm": 0.5045310258865356, "learning_rate": 9.867709079300366e-06, "loss": 0.3823, "step": 1071 }, { "epoch": 0.49852736009920945, "grad_norm": 0.486145943403244, "learning_rate": 9.867090035755648e-06, "loss": 0.3714, "step": 1072 }, { "epoch": 0.49899240427840647, "grad_norm": 0.4772482216358185, "learning_rate": 9.866469566719314e-06, "loss": 0.3799, "step": 1073 }, { "epoch": 0.4994574484576035, "grad_norm": 0.546289324760437, "learning_rate": 9.86584767237309e-06, "loss": 0.4263, "step": 1074 }, { "epoch": 0.4999224926368005, "grad_norm": 0.5823156833648682, "learning_rate": 9.86522435289912e-06, "loss": 0.4101, "step": 1075 }, { "epoch": 0.5003875368159976, "grad_norm": 0.5479342341423035, "learning_rate": 9.864599608479963e-06, "loss": 0.3717, "step": 1076 }, { "epoch": 0.5008525809951946, "grad_norm": 0.5319987535476685, "learning_rate": 9.863973439298597e-06, "loss": 0.3993, "step": 1077 }, { "epoch": 0.5013176251743916, "grad_norm": 0.5460782647132874, "learning_rate": 9.86334584553842e-06, "loss": 0.4069, "step": 1078 }, { "epoch": 0.5017826693535886, "grad_norm": 0.6070926189422607, "learning_rate": 9.862716827383238e-06, "loss": 0.3999, "step": 1079 }, { "epoch": 0.5022477135327856, "grad_norm": 0.530796229839325, "learning_rate": 9.862086385017283e-06, "loss": 0.3975, "step": 1080 }, { "epoch": 0.5027127577119826, "grad_norm": 0.4481984078884125, "learning_rate": 9.861454518625202e-06, "loss": 0.3962, "step": 1081 }, { "epoch": 0.5031778018911797, "grad_norm": 0.4798201024532318, "learning_rate": 9.86082122839206e-06, "loss": 0.3859, "step": 1082 }, { "epoch": 0.5036428460703767, "grad_norm": 0.5404430627822876, "learning_rate": 9.86018651450333e-06, "loss": 0.3999, "step": 1083 }, { "epoch": 0.5041078902495737, "grad_norm": 0.4909595549106598, "learning_rate": 9.85955037714492e-06, "loss": 0.3951, "step": 1084 }, { "epoch": 0.5045729344287707, "grad_norm": 0.5814231634140015, "learning_rate": 9.858912816503136e-06, "loss": 0.4005, "step": 1085 }, { "epoch": 0.5050379786079677, "grad_norm": 0.5166906118392944, "learning_rate": 9.858273832764712e-06, "loss": 0.4184, "step": 1086 }, { "epoch": 0.5055030227871647, "grad_norm": 0.5268685221672058, "learning_rate": 9.8576334261168e-06, "loss": 0.4257, "step": 1087 }, { "epoch": 0.5059680669663618, "grad_norm": 0.4857880473136902, "learning_rate": 9.856991596746957e-06, "loss": 0.3995, "step": 1088 }, { "epoch": 0.5064331111455588, "grad_norm": 0.6406244039535522, "learning_rate": 9.85634834484317e-06, "loss": 0.4282, "step": 1089 }, { "epoch": 0.5068981553247559, "grad_norm": 0.5598058104515076, "learning_rate": 9.855703670593834e-06, "loss": 0.3901, "step": 1090 }, { "epoch": 0.5073631995039529, "grad_norm": 0.5567111372947693, "learning_rate": 9.855057574187766e-06, "loss": 0.4043, "step": 1091 }, { "epoch": 0.5078282436831499, "grad_norm": 0.534076452255249, "learning_rate": 9.854410055814195e-06, "loss": 0.4011, "step": 1092 }, { "epoch": 0.508293287862347, "grad_norm": 0.42775899171829224, "learning_rate": 9.85376111566277e-06, "loss": 0.3986, "step": 1093 }, { "epoch": 0.508758332041544, "grad_norm": 0.5820823907852173, "learning_rate": 9.853110753923553e-06, "loss": 0.4143, "step": 1094 }, { "epoch": 0.509223376220741, "grad_norm": 0.46398279070854187, "learning_rate": 9.852458970787027e-06, "loss": 0.3816, "step": 1095 }, { "epoch": 0.509688420399938, "grad_norm": 0.4692523777484894, "learning_rate": 9.85180576644409e-06, "loss": 0.3855, "step": 1096 }, { "epoch": 0.510153464579135, "grad_norm": 0.45275288820266724, "learning_rate": 9.851151141086049e-06, "loss": 0.3736, "step": 1097 }, { "epoch": 0.510618508758332, "grad_norm": 0.6101323962211609, "learning_rate": 9.850495094904639e-06, "loss": 0.3844, "step": 1098 }, { "epoch": 0.511083552937529, "grad_norm": 0.5054144859313965, "learning_rate": 9.849837628092003e-06, "loss": 0.4187, "step": 1099 }, { "epoch": 0.5115485971167261, "grad_norm": 0.4948999285697937, "learning_rate": 9.849178740840701e-06, "loss": 0.3826, "step": 1100 }, { "epoch": 0.5120136412959231, "grad_norm": 0.6334494948387146, "learning_rate": 9.848518433343714e-06, "loss": 0.3986, "step": 1101 }, { "epoch": 0.5124786854751201, "grad_norm": 0.5066748857498169, "learning_rate": 9.847856705794432e-06, "loss": 0.4185, "step": 1102 }, { "epoch": 0.5129437296543171, "grad_norm": 0.5187157392501831, "learning_rate": 9.847193558386666e-06, "loss": 0.385, "step": 1103 }, { "epoch": 0.5134087738335141, "grad_norm": 0.5060462951660156, "learning_rate": 9.846528991314638e-06, "loss": 0.415, "step": 1104 }, { "epoch": 0.5138738180127113, "grad_norm": 0.418716162443161, "learning_rate": 9.845863004772994e-06, "loss": 0.3735, "step": 1105 }, { "epoch": 0.5143388621919083, "grad_norm": 0.45739948749542236, "learning_rate": 9.845195598956787e-06, "loss": 0.3532, "step": 1106 }, { "epoch": 0.5148039063711053, "grad_norm": 0.42475655674934387, "learning_rate": 9.84452677406149e-06, "loss": 0.3871, "step": 1107 }, { "epoch": 0.5152689505503023, "grad_norm": 0.5018404722213745, "learning_rate": 9.843856530282992e-06, "loss": 0.396, "step": 1108 }, { "epoch": 0.5157339947294993, "grad_norm": 0.4738996624946594, "learning_rate": 9.843184867817596e-06, "loss": 0.4255, "step": 1109 }, { "epoch": 0.5161990389086963, "grad_norm": 0.47539880871772766, "learning_rate": 9.842511786862018e-06, "loss": 0.4214, "step": 1110 }, { "epoch": 0.5166640830878934, "grad_norm": 0.43585407733917236, "learning_rate": 9.841837287613399e-06, "loss": 0.3895, "step": 1111 }, { "epoch": 0.5171291272670904, "grad_norm": 0.4282119870185852, "learning_rate": 9.841161370269284e-06, "loss": 0.3917, "step": 1112 }, { "epoch": 0.5175941714462874, "grad_norm": 0.42898568511009216, "learning_rate": 9.84048403502764e-06, "loss": 0.3847, "step": 1113 }, { "epoch": 0.5180592156254844, "grad_norm": 0.5623050928115845, "learning_rate": 9.839805282086844e-06, "loss": 0.415, "step": 1114 }, { "epoch": 0.5185242598046814, "grad_norm": 0.4873126447200775, "learning_rate": 9.839125111645699e-06, "loss": 0.4168, "step": 1115 }, { "epoch": 0.5189893039838784, "grad_norm": 0.4611026644706726, "learning_rate": 9.83844352390341e-06, "loss": 0.3892, "step": 1116 }, { "epoch": 0.5194543481630755, "grad_norm": 0.5437391996383667, "learning_rate": 9.837760519059603e-06, "loss": 0.4169, "step": 1117 }, { "epoch": 0.5199193923422725, "grad_norm": 0.47111251950263977, "learning_rate": 9.83707609731432e-06, "loss": 0.398, "step": 1118 }, { "epoch": 0.5203844365214695, "grad_norm": 0.4614557921886444, "learning_rate": 9.83639025886802e-06, "loss": 0.3906, "step": 1119 }, { "epoch": 0.5208494807006666, "grad_norm": 0.5393845438957214, "learning_rate": 9.835703003921569e-06, "loss": 0.3886, "step": 1120 }, { "epoch": 0.5213145248798636, "grad_norm": 0.4539320468902588, "learning_rate": 9.835014332676256e-06, "loss": 0.4078, "step": 1121 }, { "epoch": 0.5217795690590606, "grad_norm": 0.5154919028282166, "learning_rate": 9.834324245333782e-06, "loss": 0.3883, "step": 1122 }, { "epoch": 0.5222446132382577, "grad_norm": 0.5002031326293945, "learning_rate": 9.833632742096259e-06, "loss": 0.3834, "step": 1123 }, { "epoch": 0.5227096574174547, "grad_norm": 0.551112949848175, "learning_rate": 9.83293982316622e-06, "loss": 0.4026, "step": 1124 }, { "epoch": 0.5231747015966517, "grad_norm": 0.5185533761978149, "learning_rate": 9.832245488746612e-06, "loss": 0.3856, "step": 1125 }, { "epoch": 0.5236397457758487, "grad_norm": 0.4748762249946594, "learning_rate": 9.831549739040788e-06, "loss": 0.3975, "step": 1126 }, { "epoch": 0.5241047899550457, "grad_norm": 0.5362114906311035, "learning_rate": 9.830852574252525e-06, "loss": 0.4028, "step": 1127 }, { "epoch": 0.5245698341342427, "grad_norm": 0.47415444254875183, "learning_rate": 9.830153994586013e-06, "loss": 0.3969, "step": 1128 }, { "epoch": 0.5250348783134398, "grad_norm": 0.6344930529594421, "learning_rate": 9.829454000245854e-06, "loss": 0.4292, "step": 1129 }, { "epoch": 0.5254999224926368, "grad_norm": 0.48250138759613037, "learning_rate": 9.82875259143706e-06, "loss": 0.3995, "step": 1130 }, { "epoch": 0.5259649666718338, "grad_norm": 0.5570687055587769, "learning_rate": 9.82804976836507e-06, "loss": 0.3855, "step": 1131 }, { "epoch": 0.5264300108510308, "grad_norm": 0.554617166519165, "learning_rate": 9.827345531235722e-06, "loss": 0.4045, "step": 1132 }, { "epoch": 0.5268950550302278, "grad_norm": 0.544276773929596, "learning_rate": 9.826639880255282e-06, "loss": 0.4065, "step": 1133 }, { "epoch": 0.527360099209425, "grad_norm": 0.5573837161064148, "learning_rate": 9.825932815630418e-06, "loss": 0.4231, "step": 1134 }, { "epoch": 0.527825143388622, "grad_norm": 0.4598175883293152, "learning_rate": 9.825224337568224e-06, "loss": 0.4033, "step": 1135 }, { "epoch": 0.528290187567819, "grad_norm": 0.5561431050300598, "learning_rate": 9.824514446276197e-06, "loss": 0.3891, "step": 1136 }, { "epoch": 0.528755231747016, "grad_norm": 0.46380776166915894, "learning_rate": 9.823803141962253e-06, "loss": 0.3938, "step": 1137 }, { "epoch": 0.529220275926213, "grad_norm": 0.5651960968971252, "learning_rate": 9.823090424834725e-06, "loss": 0.3797, "step": 1138 }, { "epoch": 0.52968532010541, "grad_norm": 0.5517980456352234, "learning_rate": 9.822376295102352e-06, "loss": 0.3812, "step": 1139 }, { "epoch": 0.530150364284607, "grad_norm": 0.5638952851295471, "learning_rate": 9.821660752974294e-06, "loss": 0.3835, "step": 1140 }, { "epoch": 0.5306154084638041, "grad_norm": 0.5966677665710449, "learning_rate": 9.82094379866012e-06, "loss": 0.4018, "step": 1141 }, { "epoch": 0.5310804526430011, "grad_norm": 0.5064060091972351, "learning_rate": 9.820225432369814e-06, "loss": 0.3956, "step": 1142 }, { "epoch": 0.5315454968221981, "grad_norm": 0.5523810386657715, "learning_rate": 9.819505654313775e-06, "loss": 0.3872, "step": 1143 }, { "epoch": 0.5320105410013951, "grad_norm": 0.5287060141563416, "learning_rate": 9.818784464702813e-06, "loss": 0.4018, "step": 1144 }, { "epoch": 0.5324755851805921, "grad_norm": 0.5854330062866211, "learning_rate": 9.818061863748153e-06, "loss": 0.4165, "step": 1145 }, { "epoch": 0.5329406293597891, "grad_norm": 0.4986901581287384, "learning_rate": 9.817337851661436e-06, "loss": 0.3944, "step": 1146 }, { "epoch": 0.5334056735389862, "grad_norm": 0.51901775598526, "learning_rate": 9.81661242865471e-06, "loss": 0.4012, "step": 1147 }, { "epoch": 0.5338707177181832, "grad_norm": 0.4892278015613556, "learning_rate": 9.815885594940442e-06, "loss": 0.3884, "step": 1148 }, { "epoch": 0.5343357618973803, "grad_norm": 0.4971807599067688, "learning_rate": 9.815157350731506e-06, "loss": 0.3971, "step": 1149 }, { "epoch": 0.5348008060765773, "grad_norm": 0.49850407242774963, "learning_rate": 9.814427696241197e-06, "loss": 0.3975, "step": 1150 }, { "epoch": 0.5352658502557743, "grad_norm": 0.49632036685943604, "learning_rate": 9.813696631683216e-06, "loss": 0.3923, "step": 1151 }, { "epoch": 0.5357308944349713, "grad_norm": 0.5661031007766724, "learning_rate": 9.812964157271683e-06, "loss": 0.376, "step": 1152 }, { "epoch": 0.5361959386141684, "grad_norm": 0.48331817984580994, "learning_rate": 9.812230273221124e-06, "loss": 0.41, "step": 1153 }, { "epoch": 0.5366609827933654, "grad_norm": 0.4824008047580719, "learning_rate": 9.811494979746486e-06, "loss": 0.397, "step": 1154 }, { "epoch": 0.5371260269725624, "grad_norm": 0.47880852222442627, "learning_rate": 9.81075827706312e-06, "loss": 0.3718, "step": 1155 }, { "epoch": 0.5375910711517594, "grad_norm": 0.4963204562664032, "learning_rate": 9.810020165386797e-06, "loss": 0.4233, "step": 1156 }, { "epoch": 0.5380561153309564, "grad_norm": 0.5019689202308655, "learning_rate": 9.809280644933698e-06, "loss": 0.4058, "step": 1157 }, { "epoch": 0.5385211595101534, "grad_norm": 0.5196408033370972, "learning_rate": 9.808539715920415e-06, "loss": 0.4112, "step": 1158 }, { "epoch": 0.5389862036893505, "grad_norm": 0.48643895983695984, "learning_rate": 9.807797378563957e-06, "loss": 0.397, "step": 1159 }, { "epoch": 0.5394512478685475, "grad_norm": 0.49545997381210327, "learning_rate": 9.80705363308174e-06, "loss": 0.3896, "step": 1160 }, { "epoch": 0.5399162920477445, "grad_norm": 0.5133934617042542, "learning_rate": 9.806308479691595e-06, "loss": 0.3979, "step": 1161 }, { "epoch": 0.5403813362269415, "grad_norm": 0.47291284799575806, "learning_rate": 9.805561918611766e-06, "loss": 0.3839, "step": 1162 }, { "epoch": 0.5408463804061386, "grad_norm": 0.4970063865184784, "learning_rate": 9.804813950060909e-06, "loss": 0.383, "step": 1163 }, { "epoch": 0.5413114245853357, "grad_norm": 0.46584850549697876, "learning_rate": 9.804064574258092e-06, "loss": 0.3885, "step": 1164 }, { "epoch": 0.5417764687645327, "grad_norm": 0.5352959632873535, "learning_rate": 9.803313791422793e-06, "loss": 0.4041, "step": 1165 }, { "epoch": 0.5422415129437297, "grad_norm": 0.5488241910934448, "learning_rate": 9.802561601774905e-06, "loss": 0.3859, "step": 1166 }, { "epoch": 0.5427065571229267, "grad_norm": 0.45431554317474365, "learning_rate": 9.801808005534734e-06, "loss": 0.4022, "step": 1167 }, { "epoch": 0.5431716013021237, "grad_norm": 0.5047218203544617, "learning_rate": 9.801053002922994e-06, "loss": 0.3866, "step": 1168 }, { "epoch": 0.5436366454813207, "grad_norm": 0.5306795239448547, "learning_rate": 9.800296594160814e-06, "loss": 0.3917, "step": 1169 }, { "epoch": 0.5441016896605178, "grad_norm": 0.48350954055786133, "learning_rate": 9.799538779469734e-06, "loss": 0.4058, "step": 1170 }, { "epoch": 0.5445667338397148, "grad_norm": 0.5609748959541321, "learning_rate": 9.798779559071706e-06, "loss": 0.3784, "step": 1171 }, { "epoch": 0.5450317780189118, "grad_norm": 0.4680517315864563, "learning_rate": 9.798018933189089e-06, "loss": 0.4218, "step": 1172 }, { "epoch": 0.5454968221981088, "grad_norm": 0.4936869442462921, "learning_rate": 9.797256902044666e-06, "loss": 0.3879, "step": 1173 }, { "epoch": 0.5459618663773058, "grad_norm": 0.48776108026504517, "learning_rate": 9.796493465861613e-06, "loss": 0.4006, "step": 1174 }, { "epoch": 0.5464269105565028, "grad_norm": 0.49075883626937866, "learning_rate": 9.795728624863539e-06, "loss": 0.4062, "step": 1175 }, { "epoch": 0.5468919547356998, "grad_norm": 0.4694967269897461, "learning_rate": 9.794962379274448e-06, "loss": 0.3955, "step": 1176 }, { "epoch": 0.5473569989148969, "grad_norm": 0.5107150077819824, "learning_rate": 9.79419472931876e-06, "loss": 0.4148, "step": 1177 }, { "epoch": 0.547822043094094, "grad_norm": 0.4907507002353668, "learning_rate": 9.793425675221308e-06, "loss": 0.3937, "step": 1178 }, { "epoch": 0.548287087273291, "grad_norm": 0.6052386164665222, "learning_rate": 9.79265521720734e-06, "loss": 0.3849, "step": 1179 }, { "epoch": 0.548752131452488, "grad_norm": 0.5238395929336548, "learning_rate": 9.791883355502503e-06, "loss": 0.4057, "step": 1180 }, { "epoch": 0.549217175631685, "grad_norm": 0.5540690422058105, "learning_rate": 9.791110090332866e-06, "loss": 0.3914, "step": 1181 }, { "epoch": 0.5496822198108821, "grad_norm": 0.5288317203521729, "learning_rate": 9.79033542192491e-06, "loss": 0.3932, "step": 1182 }, { "epoch": 0.5501472639900791, "grad_norm": 0.5038856863975525, "learning_rate": 9.789559350505515e-06, "loss": 0.398, "step": 1183 }, { "epoch": 0.5506123081692761, "grad_norm": 0.45208224654197693, "learning_rate": 9.788781876301988e-06, "loss": 0.4266, "step": 1184 }, { "epoch": 0.5510773523484731, "grad_norm": 0.5108579993247986, "learning_rate": 9.78800299954203e-06, "loss": 0.4126, "step": 1185 }, { "epoch": 0.5515423965276701, "grad_norm": 0.45797398686408997, "learning_rate": 9.787222720453769e-06, "loss": 0.4055, "step": 1186 }, { "epoch": 0.5520074407068671, "grad_norm": 0.43965256214141846, "learning_rate": 9.78644103926573e-06, "loss": 0.3847, "step": 1187 }, { "epoch": 0.5524724848860642, "grad_norm": 0.5082615613937378, "learning_rate": 9.78565795620686e-06, "loss": 0.3791, "step": 1188 }, { "epoch": 0.5529375290652612, "grad_norm": 0.5475904941558838, "learning_rate": 9.784873471506509e-06, "loss": 0.379, "step": 1189 }, { "epoch": 0.5534025732444582, "grad_norm": 0.5068783760070801, "learning_rate": 9.784087585394437e-06, "loss": 0.422, "step": 1190 }, { "epoch": 0.5538676174236552, "grad_norm": 0.48961615562438965, "learning_rate": 9.783300298100822e-06, "loss": 0.4116, "step": 1191 }, { "epoch": 0.5543326616028522, "grad_norm": 0.46741366386413574, "learning_rate": 9.782511609856244e-06, "loss": 0.4024, "step": 1192 }, { "epoch": 0.5547977057820493, "grad_norm": 0.4937583804130554, "learning_rate": 9.7817215208917e-06, "loss": 0.3889, "step": 1193 }, { "epoch": 0.5552627499612464, "grad_norm": 0.5000990033149719, "learning_rate": 9.780930031438594e-06, "loss": 0.4062, "step": 1194 }, { "epoch": 0.5557277941404434, "grad_norm": 0.5129031538963318, "learning_rate": 9.780137141728737e-06, "loss": 0.3849, "step": 1195 }, { "epoch": 0.5561928383196404, "grad_norm": 0.46762484312057495, "learning_rate": 9.779342851994356e-06, "loss": 0.3877, "step": 1196 }, { "epoch": 0.5566578824988374, "grad_norm": 0.4732123911380768, "learning_rate": 9.778547162468087e-06, "loss": 0.3746, "step": 1197 }, { "epoch": 0.5571229266780344, "grad_norm": 0.44343671202659607, "learning_rate": 9.77775007338297e-06, "loss": 0.3878, "step": 1198 }, { "epoch": 0.5575879708572314, "grad_norm": 0.45024973154067993, "learning_rate": 9.776951584972464e-06, "loss": 0.3907, "step": 1199 }, { "epoch": 0.5580530150364285, "grad_norm": 0.4816182255744934, "learning_rate": 9.776151697470431e-06, "loss": 0.4158, "step": 1200 }, { "epoch": 0.5585180592156255, "grad_norm": 0.4887128472328186, "learning_rate": 9.775350411111145e-06, "loss": 0.4127, "step": 1201 }, { "epoch": 0.5589831033948225, "grad_norm": 0.520602285861969, "learning_rate": 9.77454772612929e-06, "loss": 0.4024, "step": 1202 }, { "epoch": 0.5594481475740195, "grad_norm": 0.4444476366043091, "learning_rate": 9.773743642759961e-06, "loss": 0.4007, "step": 1203 }, { "epoch": 0.5599131917532165, "grad_norm": 0.5314255356788635, "learning_rate": 9.77293816123866e-06, "loss": 0.4009, "step": 1204 }, { "epoch": 0.5603782359324135, "grad_norm": 0.5080925226211548, "learning_rate": 9.7721312818013e-06, "loss": 0.3741, "step": 1205 }, { "epoch": 0.5608432801116106, "grad_norm": 0.4945761263370514, "learning_rate": 9.7713230046842e-06, "loss": 0.4204, "step": 1206 }, { "epoch": 0.5613083242908077, "grad_norm": 0.5242900848388672, "learning_rate": 9.770513330124094e-06, "loss": 0.3796, "step": 1207 }, { "epoch": 0.5617733684700047, "grad_norm": 0.48071667551994324, "learning_rate": 9.769702258358123e-06, "loss": 0.4133, "step": 1208 }, { "epoch": 0.5622384126492017, "grad_norm": 0.497071772813797, "learning_rate": 9.768889789623833e-06, "loss": 0.3845, "step": 1209 }, { "epoch": 0.5627034568283987, "grad_norm": 0.5164023041725159, "learning_rate": 9.768075924159185e-06, "loss": 0.3954, "step": 1210 }, { "epoch": 0.5631685010075957, "grad_norm": 0.46989476680755615, "learning_rate": 9.767260662202551e-06, "loss": 0.4029, "step": 1211 }, { "epoch": 0.5636335451867928, "grad_norm": 0.5252077579498291, "learning_rate": 9.766444003992704e-06, "loss": 0.3907, "step": 1212 }, { "epoch": 0.5640985893659898, "grad_norm": 0.5021181106567383, "learning_rate": 9.765625949768828e-06, "loss": 0.3868, "step": 1213 }, { "epoch": 0.5645636335451868, "grad_norm": 0.4650791585445404, "learning_rate": 9.764806499770521e-06, "loss": 0.3805, "step": 1214 }, { "epoch": 0.5650286777243838, "grad_norm": 0.47574383020401, "learning_rate": 9.763985654237785e-06, "loss": 0.3812, "step": 1215 }, { "epoch": 0.5654937219035808, "grad_norm": 0.44989126920700073, "learning_rate": 9.763163413411034e-06, "loss": 0.3736, "step": 1216 }, { "epoch": 0.5659587660827778, "grad_norm": 0.5040990710258484, "learning_rate": 9.762339777531088e-06, "loss": 0.3744, "step": 1217 }, { "epoch": 0.5664238102619749, "grad_norm": 0.5459234118461609, "learning_rate": 9.761514746839176e-06, "loss": 0.3916, "step": 1218 }, { "epoch": 0.5668888544411719, "grad_norm": 0.48448291420936584, "learning_rate": 9.760688321576938e-06, "loss": 0.4425, "step": 1219 }, { "epoch": 0.5673538986203689, "grad_norm": 0.5275092720985413, "learning_rate": 9.759860501986417e-06, "loss": 0.3998, "step": 1220 }, { "epoch": 0.5678189427995659, "grad_norm": 0.6015114188194275, "learning_rate": 9.759031288310072e-06, "loss": 0.4155, "step": 1221 }, { "epoch": 0.568283986978763, "grad_norm": 0.47165900468826294, "learning_rate": 9.758200680790764e-06, "loss": 0.387, "step": 1222 }, { "epoch": 0.56874903115796, "grad_norm": 0.592989444732666, "learning_rate": 9.757368679671764e-06, "loss": 0.3973, "step": 1223 }, { "epoch": 0.5692140753371571, "grad_norm": 0.4669657349586487, "learning_rate": 9.756535285196754e-06, "loss": 0.3762, "step": 1224 }, { "epoch": 0.5696791195163541, "grad_norm": 0.48349782824516296, "learning_rate": 9.755700497609819e-06, "loss": 0.4043, "step": 1225 }, { "epoch": 0.5701441636955511, "grad_norm": 0.5364663600921631, "learning_rate": 9.754864317155455e-06, "loss": 0.4216, "step": 1226 }, { "epoch": 0.5706092078747481, "grad_norm": 0.5064231753349304, "learning_rate": 9.754026744078569e-06, "loss": 0.4004, "step": 1227 }, { "epoch": 0.5710742520539451, "grad_norm": 0.48567453026771545, "learning_rate": 9.753187778624467e-06, "loss": 0.3985, "step": 1228 }, { "epoch": 0.5715392962331421, "grad_norm": 0.5714061856269836, "learning_rate": 9.752347421038873e-06, "loss": 0.3836, "step": 1229 }, { "epoch": 0.5720043404123392, "grad_norm": 0.4642956256866455, "learning_rate": 9.751505671567914e-06, "loss": 0.3919, "step": 1230 }, { "epoch": 0.5724693845915362, "grad_norm": 0.4755660891532898, "learning_rate": 9.750662530458121e-06, "loss": 0.3881, "step": 1231 }, { "epoch": 0.5729344287707332, "grad_norm": 0.48824259638786316, "learning_rate": 9.749817997956438e-06, "loss": 0.3871, "step": 1232 }, { "epoch": 0.5733994729499302, "grad_norm": 0.49661746621131897, "learning_rate": 9.748972074310217e-06, "loss": 0.3709, "step": 1233 }, { "epoch": 0.5738645171291272, "grad_norm": 0.4752178490161896, "learning_rate": 9.748124759767215e-06, "loss": 0.3907, "step": 1234 }, { "epoch": 0.5743295613083242, "grad_norm": 0.47395604848861694, "learning_rate": 9.747276054575593e-06, "loss": 0.416, "step": 1235 }, { "epoch": 0.5747946054875213, "grad_norm": 0.4589254558086395, "learning_rate": 9.746425958983925e-06, "loss": 0.4059, "step": 1236 }, { "epoch": 0.5752596496667184, "grad_norm": 0.42140620946884155, "learning_rate": 9.745574473241193e-06, "loss": 0.3705, "step": 1237 }, { "epoch": 0.5757246938459154, "grad_norm": 0.45815083384513855, "learning_rate": 9.744721597596778e-06, "loss": 0.4068, "step": 1238 }, { "epoch": 0.5761897380251124, "grad_norm": 0.45956507325172424, "learning_rate": 9.743867332300478e-06, "loss": 0.3637, "step": 1239 }, { "epoch": 0.5766547822043094, "grad_norm": 0.49071380496025085, "learning_rate": 9.743011677602493e-06, "loss": 0.375, "step": 1240 }, { "epoch": 0.5771198263835065, "grad_norm": 0.4949245750904083, "learning_rate": 9.742154633753428e-06, "loss": 0.378, "step": 1241 }, { "epoch": 0.5775848705627035, "grad_norm": 0.5535846948623657, "learning_rate": 9.741296201004298e-06, "loss": 0.3901, "step": 1242 }, { "epoch": 0.5780499147419005, "grad_norm": 0.44817158579826355, "learning_rate": 9.740436379606524e-06, "loss": 0.4016, "step": 1243 }, { "epoch": 0.5785149589210975, "grad_norm": 0.5253201723098755, "learning_rate": 9.739575169811934e-06, "loss": 0.38, "step": 1244 }, { "epoch": 0.5789800031002945, "grad_norm": 0.6024049520492554, "learning_rate": 9.738712571872765e-06, "loss": 0.3948, "step": 1245 }, { "epoch": 0.5794450472794915, "grad_norm": 0.41917502880096436, "learning_rate": 9.737848586041652e-06, "loss": 0.3869, "step": 1246 }, { "epoch": 0.5799100914586885, "grad_norm": 0.5947693586349487, "learning_rate": 9.736983212571646e-06, "loss": 0.3991, "step": 1247 }, { "epoch": 0.5803751356378856, "grad_norm": 0.6152756810188293, "learning_rate": 9.736116451716203e-06, "loss": 0.4075, "step": 1248 }, { "epoch": 0.5808401798170826, "grad_norm": 0.48533540964126587, "learning_rate": 9.735248303729178e-06, "loss": 0.3881, "step": 1249 }, { "epoch": 0.5813052239962796, "grad_norm": 0.6273863315582275, "learning_rate": 9.734378768864843e-06, "loss": 0.3723, "step": 1250 }, { "epoch": 0.5817702681754767, "grad_norm": 0.5250769257545471, "learning_rate": 9.733507847377866e-06, "loss": 0.3853, "step": 1251 }, { "epoch": 0.5822353123546737, "grad_norm": 0.49252286553382874, "learning_rate": 9.73263553952333e-06, "loss": 0.3889, "step": 1252 }, { "epoch": 0.5827003565338708, "grad_norm": 0.5308325886726379, "learning_rate": 9.731761845556713e-06, "loss": 0.3936, "step": 1253 }, { "epoch": 0.5831654007130678, "grad_norm": 0.5030363202095032, "learning_rate": 9.730886765733914e-06, "loss": 0.3786, "step": 1254 }, { "epoch": 0.5836304448922648, "grad_norm": 0.46710026264190674, "learning_rate": 9.730010300311226e-06, "loss": 0.4055, "step": 1255 }, { "epoch": 0.5840954890714618, "grad_norm": 0.5640740394592285, "learning_rate": 9.72913244954535e-06, "loss": 0.4313, "step": 1256 }, { "epoch": 0.5845605332506588, "grad_norm": 0.43550801277160645, "learning_rate": 9.728253213693395e-06, "loss": 0.3745, "step": 1257 }, { "epoch": 0.5850255774298558, "grad_norm": 0.5071004629135132, "learning_rate": 9.727372593012875e-06, "loss": 0.4029, "step": 1258 }, { "epoch": 0.5854906216090529, "grad_norm": 0.5142107605934143, "learning_rate": 9.72649058776171e-06, "loss": 0.3912, "step": 1259 }, { "epoch": 0.5859556657882499, "grad_norm": 0.4210042655467987, "learning_rate": 9.725607198198227e-06, "loss": 0.3884, "step": 1260 }, { "epoch": 0.5864207099674469, "grad_norm": 0.47974395751953125, "learning_rate": 9.724722424581154e-06, "loss": 0.3834, "step": 1261 }, { "epoch": 0.5868857541466439, "grad_norm": 0.49919867515563965, "learning_rate": 9.723836267169626e-06, "loss": 0.394, "step": 1262 }, { "epoch": 0.5873507983258409, "grad_norm": 0.46105697751045227, "learning_rate": 9.722948726223185e-06, "loss": 0.383, "step": 1263 }, { "epoch": 0.5878158425050379, "grad_norm": 0.5934450030326843, "learning_rate": 9.72205980200178e-06, "loss": 0.3716, "step": 1264 }, { "epoch": 0.588280886684235, "grad_norm": 0.5127496719360352, "learning_rate": 9.72116949476576e-06, "loss": 0.3684, "step": 1265 }, { "epoch": 0.5887459308634321, "grad_norm": 0.4598689675331116, "learning_rate": 9.720277804775879e-06, "loss": 0.4068, "step": 1266 }, { "epoch": 0.5892109750426291, "grad_norm": 0.5856637358665466, "learning_rate": 9.719384732293302e-06, "loss": 0.3637, "step": 1267 }, { "epoch": 0.5896760192218261, "grad_norm": 0.5395975708961487, "learning_rate": 9.718490277579595e-06, "loss": 0.3902, "step": 1268 }, { "epoch": 0.5901410634010231, "grad_norm": 0.46271106600761414, "learning_rate": 9.71759444089673e-06, "loss": 0.374, "step": 1269 }, { "epoch": 0.5906061075802201, "grad_norm": 0.5440768003463745, "learning_rate": 9.716697222507081e-06, "loss": 0.3957, "step": 1270 }, { "epoch": 0.5910711517594172, "grad_norm": 0.5980409979820251, "learning_rate": 9.715798622673429e-06, "loss": 0.3879, "step": 1271 }, { "epoch": 0.5915361959386142, "grad_norm": 0.5140041708946228, "learning_rate": 9.71489864165896e-06, "loss": 0.3859, "step": 1272 }, { "epoch": 0.5920012401178112, "grad_norm": 0.6424972414970398, "learning_rate": 9.713997279727265e-06, "loss": 0.4169, "step": 1273 }, { "epoch": 0.5924662842970082, "grad_norm": 0.5433067679405212, "learning_rate": 9.713094537142336e-06, "loss": 0.3957, "step": 1274 }, { "epoch": 0.5929313284762052, "grad_norm": 0.5382039546966553, "learning_rate": 9.712190414168573e-06, "loss": 0.3878, "step": 1275 }, { "epoch": 0.5933963726554022, "grad_norm": 0.5087778568267822, "learning_rate": 9.711284911070777e-06, "loss": 0.4115, "step": 1276 }, { "epoch": 0.5938614168345993, "grad_norm": 0.5524824261665344, "learning_rate": 9.71037802811416e-06, "loss": 0.3898, "step": 1277 }, { "epoch": 0.5943264610137963, "grad_norm": 0.579130232334137, "learning_rate": 9.709469765564328e-06, "loss": 0.3868, "step": 1278 }, { "epoch": 0.5947915051929933, "grad_norm": 0.5298312306404114, "learning_rate": 9.708560123687298e-06, "loss": 0.4156, "step": 1279 }, { "epoch": 0.5952565493721904, "grad_norm": 0.5209439396858215, "learning_rate": 9.707649102749488e-06, "loss": 0.384, "step": 1280 }, { "epoch": 0.5957215935513874, "grad_norm": 0.5347442030906677, "learning_rate": 9.706736703017725e-06, "loss": 0.3886, "step": 1281 }, { "epoch": 0.5961866377305844, "grad_norm": 0.5219368934631348, "learning_rate": 9.705822924759235e-06, "loss": 0.388, "step": 1282 }, { "epoch": 0.5966516819097815, "grad_norm": 0.618898868560791, "learning_rate": 9.704907768241648e-06, "loss": 0.3944, "step": 1283 }, { "epoch": 0.5971167260889785, "grad_norm": 0.5122677087783813, "learning_rate": 9.703991233732995e-06, "loss": 0.3902, "step": 1284 }, { "epoch": 0.5975817702681755, "grad_norm": 0.5251182317733765, "learning_rate": 9.70307332150172e-06, "loss": 0.4144, "step": 1285 }, { "epoch": 0.5980468144473725, "grad_norm": 0.5196467041969299, "learning_rate": 9.702154031816659e-06, "loss": 0.3837, "step": 1286 }, { "epoch": 0.5985118586265695, "grad_norm": 0.5128290057182312, "learning_rate": 9.701233364947062e-06, "loss": 0.3989, "step": 1287 }, { "epoch": 0.5989769028057665, "grad_norm": 0.5336976051330566, "learning_rate": 9.700311321162577e-06, "loss": 0.3915, "step": 1288 }, { "epoch": 0.5994419469849636, "grad_norm": 0.6457362771034241, "learning_rate": 9.69938790073325e-06, "loss": 0.3883, "step": 1289 }, { "epoch": 0.5999069911641606, "grad_norm": 0.5426515340805054, "learning_rate": 9.698463103929542e-06, "loss": 0.4055, "step": 1290 }, { "epoch": 0.6003720353433576, "grad_norm": 0.49356475472450256, "learning_rate": 9.697536931022308e-06, "loss": 0.3999, "step": 1291 }, { "epoch": 0.6008370795225546, "grad_norm": 0.5430606007575989, "learning_rate": 9.69660938228281e-06, "loss": 0.3807, "step": 1292 }, { "epoch": 0.6013021237017516, "grad_norm": 0.5589854121208191, "learning_rate": 9.695680457982713e-06, "loss": 0.4153, "step": 1293 }, { "epoch": 0.6017671678809486, "grad_norm": 0.6247249841690063, "learning_rate": 9.694750158394081e-06, "loss": 0.4118, "step": 1294 }, { "epoch": 0.6022322120601458, "grad_norm": 0.6207286715507507, "learning_rate": 9.693818483789386e-06, "loss": 0.3833, "step": 1295 }, { "epoch": 0.6026972562393428, "grad_norm": 0.5082651376724243, "learning_rate": 9.692885434441498e-06, "loss": 0.4056, "step": 1296 }, { "epoch": 0.6031623004185398, "grad_norm": 0.5024476051330566, "learning_rate": 9.691951010623696e-06, "loss": 0.3943, "step": 1297 }, { "epoch": 0.6036273445977368, "grad_norm": 0.5954005718231201, "learning_rate": 9.691015212609654e-06, "loss": 0.3939, "step": 1298 }, { "epoch": 0.6040923887769338, "grad_norm": 0.6106828451156616, "learning_rate": 9.690078040673454e-06, "loss": 0.387, "step": 1299 }, { "epoch": 0.6045574329561308, "grad_norm": 0.5513212084770203, "learning_rate": 9.689139495089575e-06, "loss": 0.3904, "step": 1300 }, { "epoch": 0.6050224771353279, "grad_norm": 0.6389715671539307, "learning_rate": 9.688199576132905e-06, "loss": 0.3904, "step": 1301 }, { "epoch": 0.6054875213145249, "grad_norm": 0.4722382426261902, "learning_rate": 9.687258284078733e-06, "loss": 0.393, "step": 1302 }, { "epoch": 0.6059525654937219, "grad_norm": 0.5727136731147766, "learning_rate": 9.686315619202743e-06, "loss": 0.394, "step": 1303 }, { "epoch": 0.6064176096729189, "grad_norm": 0.6134669780731201, "learning_rate": 9.685371581781029e-06, "loss": 0.3903, "step": 1304 }, { "epoch": 0.6068826538521159, "grad_norm": 0.5710572600364685, "learning_rate": 9.684426172090084e-06, "loss": 0.4115, "step": 1305 }, { "epoch": 0.6073476980313129, "grad_norm": 0.5541101098060608, "learning_rate": 9.683479390406803e-06, "loss": 0.3935, "step": 1306 }, { "epoch": 0.60781274221051, "grad_norm": 0.5224672555923462, "learning_rate": 9.682531237008483e-06, "loss": 0.3989, "step": 1307 }, { "epoch": 0.608277786389707, "grad_norm": 0.5431579351425171, "learning_rate": 9.681581712172824e-06, "loss": 0.3995, "step": 1308 }, { "epoch": 0.608742830568904, "grad_norm": 0.5996741056442261, "learning_rate": 9.680630816177924e-06, "loss": 0.4012, "step": 1309 }, { "epoch": 0.6092078747481011, "grad_norm": 0.5371108055114746, "learning_rate": 9.679678549302287e-06, "loss": 0.404, "step": 1310 }, { "epoch": 0.6096729189272981, "grad_norm": 0.4972769320011139, "learning_rate": 9.678724911824815e-06, "loss": 0.3939, "step": 1311 }, { "epoch": 0.6101379631064952, "grad_norm": 0.7065192461013794, "learning_rate": 9.677769904024815e-06, "loss": 0.3968, "step": 1312 }, { "epoch": 0.6106030072856922, "grad_norm": 0.5725729465484619, "learning_rate": 9.676813526181989e-06, "loss": 0.4079, "step": 1313 }, { "epoch": 0.6110680514648892, "grad_norm": 0.5286259651184082, "learning_rate": 9.675855778576448e-06, "loss": 0.3818, "step": 1314 }, { "epoch": 0.6115330956440862, "grad_norm": 0.4823831617832184, "learning_rate": 9.674896661488702e-06, "loss": 0.3856, "step": 1315 }, { "epoch": 0.6119981398232832, "grad_norm": 0.5325484871864319, "learning_rate": 9.673936175199657e-06, "loss": 0.3887, "step": 1316 }, { "epoch": 0.6124631840024802, "grad_norm": 0.5285559296607971, "learning_rate": 9.672974319990627e-06, "loss": 0.4181, "step": 1317 }, { "epoch": 0.6129282281816772, "grad_norm": 0.5194781422615051, "learning_rate": 9.672011096143323e-06, "loss": 0.4084, "step": 1318 }, { "epoch": 0.6133932723608743, "grad_norm": 0.5809126496315002, "learning_rate": 9.671046503939857e-06, "loss": 0.3985, "step": 1319 }, { "epoch": 0.6138583165400713, "grad_norm": 0.443920373916626, "learning_rate": 9.670080543662742e-06, "loss": 0.386, "step": 1320 }, { "epoch": 0.6143233607192683, "grad_norm": 0.5228665471076965, "learning_rate": 9.669113215594892e-06, "loss": 0.391, "step": 1321 }, { "epoch": 0.6147884048984653, "grad_norm": 0.504928469657898, "learning_rate": 9.668144520019622e-06, "loss": 0.402, "step": 1322 }, { "epoch": 0.6152534490776623, "grad_norm": 0.5686684250831604, "learning_rate": 9.667174457220648e-06, "loss": 0.3837, "step": 1323 }, { "epoch": 0.6157184932568595, "grad_norm": 0.5331105589866638, "learning_rate": 9.666203027482086e-06, "loss": 0.3919, "step": 1324 }, { "epoch": 0.6161835374360565, "grad_norm": 0.5707592368125916, "learning_rate": 9.665230231088451e-06, "loss": 0.3985, "step": 1325 }, { "epoch": 0.6166485816152535, "grad_norm": 0.5226622223854065, "learning_rate": 9.664256068324657e-06, "loss": 0.399, "step": 1326 }, { "epoch": 0.6171136257944505, "grad_norm": 0.5462534427642822, "learning_rate": 9.663280539476026e-06, "loss": 0.399, "step": 1327 }, { "epoch": 0.6175786699736475, "grad_norm": 0.4789917469024658, "learning_rate": 9.66230364482827e-06, "loss": 0.3907, "step": 1328 }, { "epoch": 0.6180437141528445, "grad_norm": 0.45184510946273804, "learning_rate": 9.661325384667508e-06, "loss": 0.3753, "step": 1329 }, { "epoch": 0.6185087583320416, "grad_norm": 0.5181789398193359, "learning_rate": 9.660345759280254e-06, "loss": 0.3864, "step": 1330 }, { "epoch": 0.6189738025112386, "grad_norm": 0.504653811454773, "learning_rate": 9.659364768953426e-06, "loss": 0.3747, "step": 1331 }, { "epoch": 0.6194388466904356, "grad_norm": 0.47670918703079224, "learning_rate": 9.65838241397434e-06, "loss": 0.3764, "step": 1332 }, { "epoch": 0.6199038908696326, "grad_norm": 0.5914029479026794, "learning_rate": 9.657398694630713e-06, "loss": 0.4123, "step": 1333 }, { "epoch": 0.6203689350488296, "grad_norm": 0.443935751914978, "learning_rate": 9.656413611210657e-06, "loss": 0.4088, "step": 1334 }, { "epoch": 0.6208339792280266, "grad_norm": 0.4935535788536072, "learning_rate": 9.655427164002692e-06, "loss": 0.4064, "step": 1335 }, { "epoch": 0.6212990234072236, "grad_norm": 0.41665172576904297, "learning_rate": 9.654439353295728e-06, "loss": 0.3886, "step": 1336 }, { "epoch": 0.6217640675864207, "grad_norm": 0.524173378944397, "learning_rate": 9.653450179379081e-06, "loss": 0.4056, "step": 1337 }, { "epoch": 0.6222291117656177, "grad_norm": 0.497357040643692, "learning_rate": 9.65245964254246e-06, "loss": 0.4009, "step": 1338 }, { "epoch": 0.6226941559448148, "grad_norm": 0.46005746722221375, "learning_rate": 9.651467743075984e-06, "loss": 0.3666, "step": 1339 }, { "epoch": 0.6231592001240118, "grad_norm": 0.5369196534156799, "learning_rate": 9.650474481270159e-06, "loss": 0.3952, "step": 1340 }, { "epoch": 0.6236242443032088, "grad_norm": 0.5053161978721619, "learning_rate": 9.649479857415896e-06, "loss": 0.4066, "step": 1341 }, { "epoch": 0.6240892884824059, "grad_norm": 0.50250643491745, "learning_rate": 9.648483871804506e-06, "loss": 0.3863, "step": 1342 }, { "epoch": 0.6245543326616029, "grad_norm": 0.540744960308075, "learning_rate": 9.647486524727696e-06, "loss": 0.3592, "step": 1343 }, { "epoch": 0.6250193768407999, "grad_norm": 0.4549930989742279, "learning_rate": 9.646487816477575e-06, "loss": 0.4106, "step": 1344 }, { "epoch": 0.6254844210199969, "grad_norm": 0.5376303791999817, "learning_rate": 9.645487747346643e-06, "loss": 0.3856, "step": 1345 }, { "epoch": 0.6259494651991939, "grad_norm": 0.5555692911148071, "learning_rate": 9.644486317627808e-06, "loss": 0.3952, "step": 1346 }, { "epoch": 0.6264145093783909, "grad_norm": 0.48616769909858704, "learning_rate": 9.643483527614372e-06, "loss": 0.3639, "step": 1347 }, { "epoch": 0.626879553557588, "grad_norm": 0.5379546284675598, "learning_rate": 9.642479377600036e-06, "loss": 0.3779, "step": 1348 }, { "epoch": 0.627344597736785, "grad_norm": 0.5338390469551086, "learning_rate": 9.641473867878898e-06, "loss": 0.3819, "step": 1349 }, { "epoch": 0.627809641915982, "grad_norm": 0.4833146929740906, "learning_rate": 9.640466998745456e-06, "loss": 0.38, "step": 1350 }, { "epoch": 0.628274686095179, "grad_norm": 0.5547218918800354, "learning_rate": 9.639458770494608e-06, "loss": 0.3975, "step": 1351 }, { "epoch": 0.628739730274376, "grad_norm": 0.4861772060394287, "learning_rate": 9.638449183421644e-06, "loss": 0.3833, "step": 1352 }, { "epoch": 0.629204774453573, "grad_norm": 0.48040562868118286, "learning_rate": 9.637438237822256e-06, "loss": 0.3903, "step": 1353 }, { "epoch": 0.6296698186327702, "grad_norm": 0.49921414256095886, "learning_rate": 9.636425933992536e-06, "loss": 0.4055, "step": 1354 }, { "epoch": 0.6301348628119672, "grad_norm": 0.5730396509170532, "learning_rate": 9.63541227222897e-06, "loss": 0.3813, "step": 1355 }, { "epoch": 0.6305999069911642, "grad_norm": 0.49284279346466064, "learning_rate": 9.634397252828444e-06, "loss": 0.3643, "step": 1356 }, { "epoch": 0.6310649511703612, "grad_norm": 0.5882490277290344, "learning_rate": 9.63338087608824e-06, "loss": 0.3976, "step": 1357 }, { "epoch": 0.6315299953495582, "grad_norm": 0.5045767426490784, "learning_rate": 9.632363142306036e-06, "loss": 0.4051, "step": 1358 }, { "epoch": 0.6319950395287552, "grad_norm": 0.5159674882888794, "learning_rate": 9.631344051779914e-06, "loss": 0.3642, "step": 1359 }, { "epoch": 0.6324600837079523, "grad_norm": 0.6393707990646362, "learning_rate": 9.630323604808344e-06, "loss": 0.4197, "step": 1360 }, { "epoch": 0.6329251278871493, "grad_norm": 0.46538159251213074, "learning_rate": 9.629301801690205e-06, "loss": 0.3911, "step": 1361 }, { "epoch": 0.6333901720663463, "grad_norm": 0.5735601782798767, "learning_rate": 9.62827864272476e-06, "loss": 0.401, "step": 1362 }, { "epoch": 0.6338552162455433, "grad_norm": 0.44864973425865173, "learning_rate": 9.62725412821168e-06, "loss": 0.3685, "step": 1363 }, { "epoch": 0.6343202604247403, "grad_norm": 0.46898770332336426, "learning_rate": 9.626228258451027e-06, "loss": 0.3885, "step": 1364 }, { "epoch": 0.6347853046039373, "grad_norm": 0.4860677421092987, "learning_rate": 9.625201033743262e-06, "loss": 0.3992, "step": 1365 }, { "epoch": 0.6352503487831344, "grad_norm": 0.47478920221328735, "learning_rate": 9.62417245438924e-06, "loss": 0.4055, "step": 1366 }, { "epoch": 0.6357153929623314, "grad_norm": 0.5337481498718262, "learning_rate": 9.623142520690219e-06, "loss": 0.3892, "step": 1367 }, { "epoch": 0.6361804371415285, "grad_norm": 0.5208576321601868, "learning_rate": 9.622111232947847e-06, "loss": 0.3763, "step": 1368 }, { "epoch": 0.6366454813207255, "grad_norm": 0.490197092294693, "learning_rate": 9.621078591464174e-06, "loss": 0.4076, "step": 1369 }, { "epoch": 0.6371105254999225, "grad_norm": 0.5205878615379333, "learning_rate": 9.620044596541642e-06, "loss": 0.3825, "step": 1370 }, { "epoch": 0.6375755696791195, "grad_norm": 0.4583769142627716, "learning_rate": 9.61900924848309e-06, "loss": 0.3924, "step": 1371 }, { "epoch": 0.6380406138583166, "grad_norm": 0.4924391806125641, "learning_rate": 9.617972547591759e-06, "loss": 0.4019, "step": 1372 }, { "epoch": 0.6385056580375136, "grad_norm": 0.5124062895774841, "learning_rate": 9.616934494171277e-06, "loss": 0.3913, "step": 1373 }, { "epoch": 0.6389707022167106, "grad_norm": 0.5022979974746704, "learning_rate": 9.615895088525677e-06, "loss": 0.3889, "step": 1374 }, { "epoch": 0.6394357463959076, "grad_norm": 0.42661193013191223, "learning_rate": 9.614854330959382e-06, "loss": 0.3649, "step": 1375 }, { "epoch": 0.6399007905751046, "grad_norm": 0.6055207848548889, "learning_rate": 9.613812221777212e-06, "loss": 0.3975, "step": 1376 }, { "epoch": 0.6403658347543016, "grad_norm": 0.6047768592834473, "learning_rate": 9.612768761284386e-06, "loss": 0.3953, "step": 1377 }, { "epoch": 0.6408308789334987, "grad_norm": 0.4532669186592102, "learning_rate": 9.611723949786517e-06, "loss": 0.3992, "step": 1378 }, { "epoch": 0.6412959231126957, "grad_norm": 0.45877280831336975, "learning_rate": 9.610677787589611e-06, "loss": 0.4096, "step": 1379 }, { "epoch": 0.6417609672918927, "grad_norm": 0.599404513835907, "learning_rate": 9.609630275000072e-06, "loss": 0.4022, "step": 1380 }, { "epoch": 0.6422260114710897, "grad_norm": 0.5159070491790771, "learning_rate": 9.608581412324701e-06, "loss": 0.3832, "step": 1381 }, { "epoch": 0.6426910556502867, "grad_norm": 0.45905065536499023, "learning_rate": 9.607531199870692e-06, "loss": 0.3963, "step": 1382 }, { "epoch": 0.6431560998294839, "grad_norm": 0.6395869851112366, "learning_rate": 9.606479637945635e-06, "loss": 0.4031, "step": 1383 }, { "epoch": 0.6436211440086809, "grad_norm": 0.5056988000869751, "learning_rate": 9.60542672685752e-06, "loss": 0.3899, "step": 1384 }, { "epoch": 0.6440861881878779, "grad_norm": 0.56319260597229, "learning_rate": 9.604372466914717e-06, "loss": 0.388, "step": 1385 }, { "epoch": 0.6445512323670749, "grad_norm": 0.6158375144004822, "learning_rate": 9.603316858426014e-06, "loss": 0.3957, "step": 1386 }, { "epoch": 0.6450162765462719, "grad_norm": 0.4532202184200287, "learning_rate": 9.60225990170057e-06, "loss": 0.3801, "step": 1387 }, { "epoch": 0.6454813207254689, "grad_norm": 0.6414798498153687, "learning_rate": 9.60120159704796e-06, "loss": 0.3984, "step": 1388 }, { "epoch": 0.645946364904666, "grad_norm": 0.4911268651485443, "learning_rate": 9.600141944778139e-06, "loss": 0.3868, "step": 1389 }, { "epoch": 0.646411409083863, "grad_norm": 0.4856494963169098, "learning_rate": 9.599080945201462e-06, "loss": 0.421, "step": 1390 }, { "epoch": 0.64687645326306, "grad_norm": 0.49976852536201477, "learning_rate": 9.598018598628682e-06, "loss": 0.4045, "step": 1391 }, { "epoch": 0.647341497442257, "grad_norm": 0.4958864748477936, "learning_rate": 9.59695490537094e-06, "loss": 0.3759, "step": 1392 }, { "epoch": 0.647806541621454, "grad_norm": 0.4640512764453888, "learning_rate": 9.595889865739774e-06, "loss": 0.3725, "step": 1393 }, { "epoch": 0.648271585800651, "grad_norm": 0.4767007529735565, "learning_rate": 9.594823480047118e-06, "loss": 0.3764, "step": 1394 }, { "epoch": 0.648736629979848, "grad_norm": 0.509242594242096, "learning_rate": 9.5937557486053e-06, "loss": 0.374, "step": 1395 }, { "epoch": 0.6492016741590451, "grad_norm": 0.49067145586013794, "learning_rate": 9.59268667172704e-06, "loss": 0.3969, "step": 1396 }, { "epoch": 0.6496667183382422, "grad_norm": 0.5714993476867676, "learning_rate": 9.591616249725456e-06, "loss": 0.3665, "step": 1397 }, { "epoch": 0.6501317625174392, "grad_norm": 0.5996623635292053, "learning_rate": 9.590544482914052e-06, "loss": 0.3722, "step": 1398 }, { "epoch": 0.6505968066966362, "grad_norm": 0.4260701835155487, "learning_rate": 9.589471371606735e-06, "loss": 0.3707, "step": 1399 }, { "epoch": 0.6510618508758332, "grad_norm": 0.6318243741989136, "learning_rate": 9.5883969161178e-06, "loss": 0.4004, "step": 1400 }, { "epoch": 0.6515268950550303, "grad_norm": 0.5253607630729675, "learning_rate": 9.587321116761938e-06, "loss": 0.3856, "step": 1401 }, { "epoch": 0.6519919392342273, "grad_norm": 0.4526579976081848, "learning_rate": 9.586243973854234e-06, "loss": 0.4028, "step": 1402 }, { "epoch": 0.6524569834134243, "grad_norm": 0.5988115668296814, "learning_rate": 9.585165487710167e-06, "loss": 0.3829, "step": 1403 }, { "epoch": 0.6529220275926213, "grad_norm": 0.47281304001808167, "learning_rate": 9.584085658645604e-06, "loss": 0.3911, "step": 1404 }, { "epoch": 0.6533870717718183, "grad_norm": 0.48429980874061584, "learning_rate": 9.583004486976813e-06, "loss": 0.3721, "step": 1405 }, { "epoch": 0.6538521159510153, "grad_norm": 0.5094161033630371, "learning_rate": 9.58192197302045e-06, "loss": 0.3976, "step": 1406 }, { "epoch": 0.6543171601302123, "grad_norm": 0.41321924328804016, "learning_rate": 9.580838117093564e-06, "loss": 0.3871, "step": 1407 }, { "epoch": 0.6547822043094094, "grad_norm": 0.5384671092033386, "learning_rate": 9.579752919513602e-06, "loss": 0.4158, "step": 1408 }, { "epoch": 0.6552472484886064, "grad_norm": 0.5002989768981934, "learning_rate": 9.5786663805984e-06, "loss": 0.3833, "step": 1409 }, { "epoch": 0.6557122926678034, "grad_norm": 0.4137031137943268, "learning_rate": 9.577578500666187e-06, "loss": 0.3849, "step": 1410 }, { "epoch": 0.6561773368470004, "grad_norm": 0.5422747731208801, "learning_rate": 9.576489280035584e-06, "loss": 0.4073, "step": 1411 }, { "epoch": 0.6566423810261975, "grad_norm": 0.49949032068252563, "learning_rate": 9.57539871902561e-06, "loss": 0.3724, "step": 1412 }, { "epoch": 0.6571074252053946, "grad_norm": 0.4298678934574127, "learning_rate": 9.574306817955669e-06, "loss": 0.4176, "step": 1413 }, { "epoch": 0.6575724693845916, "grad_norm": 0.48693087697029114, "learning_rate": 9.57321357714556e-06, "loss": 0.404, "step": 1414 }, { "epoch": 0.6580375135637886, "grad_norm": 0.5552796721458435, "learning_rate": 9.572118996915482e-06, "loss": 0.4036, "step": 1415 }, { "epoch": 0.6585025577429856, "grad_norm": 0.4444204866886139, "learning_rate": 9.571023077586012e-06, "loss": 0.391, "step": 1416 }, { "epoch": 0.6589676019221826, "grad_norm": 0.46437713503837585, "learning_rate": 9.569925819478132e-06, "loss": 0.4061, "step": 1417 }, { "epoch": 0.6594326461013796, "grad_norm": 0.47912266850471497, "learning_rate": 9.56882722291321e-06, "loss": 0.3957, "step": 1418 }, { "epoch": 0.6598976902805767, "grad_norm": 0.4714813530445099, "learning_rate": 9.567727288213005e-06, "loss": 0.4099, "step": 1419 }, { "epoch": 0.6603627344597737, "grad_norm": 0.4540778398513794, "learning_rate": 9.566626015699673e-06, "loss": 0.3901, "step": 1420 }, { "epoch": 0.6608277786389707, "grad_norm": 0.47948190569877625, "learning_rate": 9.565523405695756e-06, "loss": 0.3996, "step": 1421 }, { "epoch": 0.6612928228181677, "grad_norm": 0.4244531989097595, "learning_rate": 9.564419458524193e-06, "loss": 0.3919, "step": 1422 }, { "epoch": 0.6617578669973647, "grad_norm": 0.4253584146499634, "learning_rate": 9.563314174508312e-06, "loss": 0.3817, "step": 1423 }, { "epoch": 0.6622229111765617, "grad_norm": 0.4504033923149109, "learning_rate": 9.56220755397183e-06, "loss": 0.3648, "step": 1424 }, { "epoch": 0.6626879553557588, "grad_norm": 0.4773210883140564, "learning_rate": 9.561099597238862e-06, "loss": 0.385, "step": 1425 }, { "epoch": 0.6631529995349558, "grad_norm": 0.4333602488040924, "learning_rate": 9.559990304633906e-06, "loss": 0.3873, "step": 1426 }, { "epoch": 0.6636180437141529, "grad_norm": 0.48123425245285034, "learning_rate": 9.55887967648186e-06, "loss": 0.3683, "step": 1427 }, { "epoch": 0.6640830878933499, "grad_norm": 0.5246723890304565, "learning_rate": 9.557767713108009e-06, "loss": 0.3702, "step": 1428 }, { "epoch": 0.6645481320725469, "grad_norm": 0.4679216146469116, "learning_rate": 9.556654414838025e-06, "loss": 0.3839, "step": 1429 }, { "epoch": 0.6650131762517439, "grad_norm": 0.5367023944854736, "learning_rate": 9.555539781997978e-06, "loss": 0.3777, "step": 1430 }, { "epoch": 0.665478220430941, "grad_norm": 0.48515602946281433, "learning_rate": 9.554423814914324e-06, "loss": 0.3749, "step": 1431 }, { "epoch": 0.665943264610138, "grad_norm": 0.46489590406417847, "learning_rate": 9.553306513913915e-06, "loss": 0.3707, "step": 1432 }, { "epoch": 0.666408308789335, "grad_norm": 0.4412256181240082, "learning_rate": 9.552187879323987e-06, "loss": 0.3971, "step": 1433 }, { "epoch": 0.666873352968532, "grad_norm": 0.4211561977863312, "learning_rate": 9.551067911472172e-06, "loss": 0.39, "step": 1434 }, { "epoch": 0.667338397147729, "grad_norm": 0.48854532837867737, "learning_rate": 9.549946610686488e-06, "loss": 0.4125, "step": 1435 }, { "epoch": 0.667803441326926, "grad_norm": 0.47784653306007385, "learning_rate": 9.548823977295348e-06, "loss": 0.3656, "step": 1436 }, { "epoch": 0.6682684855061231, "grad_norm": 0.4391965866088867, "learning_rate": 9.547700011627552e-06, "loss": 0.3929, "step": 1437 }, { "epoch": 0.6687335296853201, "grad_norm": 0.4571762979030609, "learning_rate": 9.546574714012291e-06, "loss": 0.4102, "step": 1438 }, { "epoch": 0.6691985738645171, "grad_norm": 0.4999452531337738, "learning_rate": 9.545448084779148e-06, "loss": 0.3953, "step": 1439 }, { "epoch": 0.6696636180437141, "grad_norm": 0.5316603779792786, "learning_rate": 9.544320124258093e-06, "loss": 0.4101, "step": 1440 }, { "epoch": 0.6701286622229112, "grad_norm": 0.502018928527832, "learning_rate": 9.543190832779488e-06, "loss": 0.394, "step": 1441 }, { "epoch": 0.6705937064021082, "grad_norm": 0.49598726630210876, "learning_rate": 9.542060210674084e-06, "loss": 0.4067, "step": 1442 }, { "epoch": 0.6710587505813053, "grad_norm": 0.5268638134002686, "learning_rate": 9.540928258273021e-06, "loss": 0.3724, "step": 1443 }, { "epoch": 0.6715237947605023, "grad_norm": 0.63950514793396, "learning_rate": 9.539794975907831e-06, "loss": 0.393, "step": 1444 }, { "epoch": 0.6719888389396993, "grad_norm": 0.45709770917892456, "learning_rate": 9.538660363910433e-06, "loss": 0.3779, "step": 1445 }, { "epoch": 0.6724538831188963, "grad_norm": 0.6071939468383789, "learning_rate": 9.537524422613135e-06, "loss": 0.3989, "step": 1446 }, { "epoch": 0.6729189272980933, "grad_norm": 0.5442386865615845, "learning_rate": 9.53638715234864e-06, "loss": 0.3666, "step": 1447 }, { "epoch": 0.6733839714772903, "grad_norm": 0.5082783102989197, "learning_rate": 9.535248553450031e-06, "loss": 0.4001, "step": 1448 }, { "epoch": 0.6738490156564874, "grad_norm": 0.5492592453956604, "learning_rate": 9.53410862625079e-06, "loss": 0.3681, "step": 1449 }, { "epoch": 0.6743140598356844, "grad_norm": 0.5500537157058716, "learning_rate": 9.532967371084778e-06, "loss": 0.4094, "step": 1450 }, { "epoch": 0.6747791040148814, "grad_norm": 0.528328001499176, "learning_rate": 9.531824788286255e-06, "loss": 0.4036, "step": 1451 }, { "epoch": 0.6752441481940784, "grad_norm": 0.49432677030563354, "learning_rate": 9.53068087818986e-06, "loss": 0.4007, "step": 1452 }, { "epoch": 0.6757091923732754, "grad_norm": 0.48525556921958923, "learning_rate": 9.52953564113063e-06, "loss": 0.3855, "step": 1453 }, { "epoch": 0.6761742365524724, "grad_norm": 0.5153108835220337, "learning_rate": 9.528389077443985e-06, "loss": 0.3863, "step": 1454 }, { "epoch": 0.6766392807316695, "grad_norm": 0.48874610662460327, "learning_rate": 9.527241187465735e-06, "loss": 0.3963, "step": 1455 }, { "epoch": 0.6771043249108666, "grad_norm": 0.5944570302963257, "learning_rate": 9.526091971532075e-06, "loss": 0.3698, "step": 1456 }, { "epoch": 0.6775693690900636, "grad_norm": 0.4809949994087219, "learning_rate": 9.524941429979597e-06, "loss": 0.3884, "step": 1457 }, { "epoch": 0.6780344132692606, "grad_norm": 0.5726730227470398, "learning_rate": 9.523789563145274e-06, "loss": 0.3955, "step": 1458 }, { "epoch": 0.6784994574484576, "grad_norm": 0.5116811394691467, "learning_rate": 9.522636371366467e-06, "loss": 0.3916, "step": 1459 }, { "epoch": 0.6789645016276546, "grad_norm": 0.4806547462940216, "learning_rate": 9.521481854980928e-06, "loss": 0.3909, "step": 1460 }, { "epoch": 0.6794295458068517, "grad_norm": 0.5462357997894287, "learning_rate": 9.520326014326799e-06, "loss": 0.3862, "step": 1461 }, { "epoch": 0.6798945899860487, "grad_norm": 0.44127699732780457, "learning_rate": 9.519168849742603e-06, "loss": 0.395, "step": 1462 }, { "epoch": 0.6803596341652457, "grad_norm": 0.5247107148170471, "learning_rate": 9.518010361567259e-06, "loss": 0.3771, "step": 1463 }, { "epoch": 0.6808246783444427, "grad_norm": 0.49513915181159973, "learning_rate": 9.516850550140064e-06, "loss": 0.3896, "step": 1464 }, { "epoch": 0.6812897225236397, "grad_norm": 0.4648475646972656, "learning_rate": 9.515689415800713e-06, "loss": 0.3866, "step": 1465 }, { "epoch": 0.6817547667028367, "grad_norm": 0.5785578489303589, "learning_rate": 9.514526958889279e-06, "loss": 0.4013, "step": 1466 }, { "epoch": 0.6822198108820338, "grad_norm": 0.5234367847442627, "learning_rate": 9.51336317974623e-06, "loss": 0.398, "step": 1467 }, { "epoch": 0.6826848550612308, "grad_norm": 0.5856658220291138, "learning_rate": 9.512198078712417e-06, "loss": 0.3752, "step": 1468 }, { "epoch": 0.6831498992404278, "grad_norm": 0.5868757367134094, "learning_rate": 9.511031656129079e-06, "loss": 0.3974, "step": 1469 }, { "epoch": 0.6836149434196248, "grad_norm": 0.5716127157211304, "learning_rate": 9.509863912337843e-06, "loss": 0.4253, "step": 1470 }, { "epoch": 0.6840799875988219, "grad_norm": 0.49978068470954895, "learning_rate": 9.50869484768072e-06, "loss": 0.4068, "step": 1471 }, { "epoch": 0.684545031778019, "grad_norm": 0.5351032614707947, "learning_rate": 9.507524462500112e-06, "loss": 0.3921, "step": 1472 }, { "epoch": 0.685010075957216, "grad_norm": 0.5618773102760315, "learning_rate": 9.506352757138806e-06, "loss": 0.3945, "step": 1473 }, { "epoch": 0.685475120136413, "grad_norm": 0.6486148834228516, "learning_rate": 9.505179731939975e-06, "loss": 0.4109, "step": 1474 }, { "epoch": 0.68594016431561, "grad_norm": 0.5250218510627747, "learning_rate": 9.504005387247178e-06, "loss": 0.413, "step": 1475 }, { "epoch": 0.686405208494807, "grad_norm": 0.5226433277130127, "learning_rate": 9.502829723404363e-06, "loss": 0.3724, "step": 1476 }, { "epoch": 0.686870252674004, "grad_norm": 0.5525023341178894, "learning_rate": 9.50165274075586e-06, "loss": 0.3952, "step": 1477 }, { "epoch": 0.687335296853201, "grad_norm": 0.5429186820983887, "learning_rate": 9.500474439646394e-06, "loss": 0.3966, "step": 1478 }, { "epoch": 0.6878003410323981, "grad_norm": 0.47291281819343567, "learning_rate": 9.499294820421064e-06, "loss": 0.415, "step": 1479 }, { "epoch": 0.6882653852115951, "grad_norm": 0.6125563383102417, "learning_rate": 9.498113883425364e-06, "loss": 0.3928, "step": 1480 }, { "epoch": 0.6887304293907921, "grad_norm": 0.48280781507492065, "learning_rate": 9.496931629005171e-06, "loss": 0.3946, "step": 1481 }, { "epoch": 0.6891954735699891, "grad_norm": 0.5750291347503662, "learning_rate": 9.49574805750675e-06, "loss": 0.3947, "step": 1482 }, { "epoch": 0.6896605177491861, "grad_norm": 0.5800142288208008, "learning_rate": 9.494563169276747e-06, "loss": 0.4352, "step": 1483 }, { "epoch": 0.6901255619283831, "grad_norm": 0.469500869512558, "learning_rate": 9.493376964662197e-06, "loss": 0.3659, "step": 1484 }, { "epoch": 0.6905906061075803, "grad_norm": 0.5604742765426636, "learning_rate": 9.492189444010522e-06, "loss": 0.3868, "step": 1485 }, { "epoch": 0.6910556502867773, "grad_norm": 0.5544446706771851, "learning_rate": 9.491000607669525e-06, "loss": 0.4125, "step": 1486 }, { "epoch": 0.6915206944659743, "grad_norm": 0.5978662371635437, "learning_rate": 9.489810455987398e-06, "loss": 0.374, "step": 1487 }, { "epoch": 0.6919857386451713, "grad_norm": 0.4866108298301697, "learning_rate": 9.488618989312719e-06, "loss": 0.3797, "step": 1488 }, { "epoch": 0.6924507828243683, "grad_norm": 0.5635210275650024, "learning_rate": 9.487426207994445e-06, "loss": 0.3627, "step": 1489 }, { "epoch": 0.6929158270035654, "grad_norm": 0.534838855266571, "learning_rate": 9.486232112381926e-06, "loss": 0.397, "step": 1490 }, { "epoch": 0.6933808711827624, "grad_norm": 0.5099875926971436, "learning_rate": 9.485036702824892e-06, "loss": 0.397, "step": 1491 }, { "epoch": 0.6938459153619594, "grad_norm": 0.6172494888305664, "learning_rate": 9.483839979673459e-06, "loss": 0.4001, "step": 1492 }, { "epoch": 0.6943109595411564, "grad_norm": 0.5031498670578003, "learning_rate": 9.482641943278127e-06, "loss": 0.395, "step": 1493 }, { "epoch": 0.6947760037203534, "grad_norm": 0.5278390645980835, "learning_rate": 9.481442593989781e-06, "loss": 0.3882, "step": 1494 }, { "epoch": 0.6952410478995504, "grad_norm": 0.564165472984314, "learning_rate": 9.480241932159692e-06, "loss": 0.3947, "step": 1495 }, { "epoch": 0.6957060920787475, "grad_norm": 0.47055506706237793, "learning_rate": 9.479039958139516e-06, "loss": 0.3843, "step": 1496 }, { "epoch": 0.6961711362579445, "grad_norm": 0.6169893741607666, "learning_rate": 9.477836672281291e-06, "loss": 0.3824, "step": 1497 }, { "epoch": 0.6966361804371415, "grad_norm": 0.4662085473537445, "learning_rate": 9.476632074937438e-06, "loss": 0.4037, "step": 1498 }, { "epoch": 0.6971012246163385, "grad_norm": 0.5007343888282776, "learning_rate": 9.475426166460763e-06, "loss": 0.3552, "step": 1499 }, { "epoch": 0.6975662687955356, "grad_norm": 0.5537876486778259, "learning_rate": 9.47421894720446e-06, "loss": 0.3963, "step": 1500 }, { "epoch": 0.6980313129747326, "grad_norm": 0.49283310770988464, "learning_rate": 9.473010417522104e-06, "loss": 0.3937, "step": 1501 }, { "epoch": 0.6984963571539297, "grad_norm": 0.5946938395500183, "learning_rate": 9.471800577767651e-06, "loss": 0.3875, "step": 1502 }, { "epoch": 0.6989614013331267, "grad_norm": 0.4660334885120392, "learning_rate": 9.470589428295444e-06, "loss": 0.3862, "step": 1503 }, { "epoch": 0.6994264455123237, "grad_norm": 0.5642980337142944, "learning_rate": 9.469376969460212e-06, "loss": 0.3944, "step": 1504 }, { "epoch": 0.6998914896915207, "grad_norm": 0.5256946682929993, "learning_rate": 9.468163201617063e-06, "loss": 0.3991, "step": 1505 }, { "epoch": 0.7003565338707177, "grad_norm": 0.5281638503074646, "learning_rate": 9.466948125121486e-06, "loss": 0.4028, "step": 1506 }, { "epoch": 0.7008215780499147, "grad_norm": 0.5681607723236084, "learning_rate": 9.465731740329364e-06, "loss": 0.3888, "step": 1507 }, { "epoch": 0.7012866222291118, "grad_norm": 0.5455275177955627, "learning_rate": 9.46451404759695e-06, "loss": 0.3982, "step": 1508 }, { "epoch": 0.7017516664083088, "grad_norm": 0.49493181705474854, "learning_rate": 9.463295047280892e-06, "loss": 0.4045, "step": 1509 }, { "epoch": 0.7022167105875058, "grad_norm": 0.574228048324585, "learning_rate": 9.462074739738212e-06, "loss": 0.3897, "step": 1510 }, { "epoch": 0.7026817547667028, "grad_norm": 0.5371845364570618, "learning_rate": 9.460853125326317e-06, "loss": 0.3884, "step": 1511 }, { "epoch": 0.7031467989458998, "grad_norm": 0.5111741423606873, "learning_rate": 9.459630204403001e-06, "loss": 0.3557, "step": 1512 }, { "epoch": 0.7036118431250968, "grad_norm": 0.5982129573822021, "learning_rate": 9.458405977326436e-06, "loss": 0.405, "step": 1513 }, { "epoch": 0.7040768873042939, "grad_norm": 0.48484864830970764, "learning_rate": 9.45718044445518e-06, "loss": 0.3856, "step": 1514 }, { "epoch": 0.704541931483491, "grad_norm": 0.4435729682445526, "learning_rate": 9.455953606148172e-06, "loss": 0.3944, "step": 1515 }, { "epoch": 0.705006975662688, "grad_norm": 0.5365220308303833, "learning_rate": 9.454725462764729e-06, "loss": 0.3986, "step": 1516 }, { "epoch": 0.705472019841885, "grad_norm": 0.5052123069763184, "learning_rate": 9.453496014664557e-06, "loss": 0.3782, "step": 1517 }, { "epoch": 0.705937064021082, "grad_norm": 0.4664888381958008, "learning_rate": 9.452265262207741e-06, "loss": 0.3802, "step": 1518 }, { "epoch": 0.706402108200279, "grad_norm": 0.5673739314079285, "learning_rate": 9.451033205754749e-06, "loss": 0.4167, "step": 1519 }, { "epoch": 0.7068671523794761, "grad_norm": 0.5380971431732178, "learning_rate": 9.44979984566643e-06, "loss": 0.3781, "step": 1520 }, { "epoch": 0.7073321965586731, "grad_norm": 0.4242795407772064, "learning_rate": 9.448565182304015e-06, "loss": 0.3824, "step": 1521 }, { "epoch": 0.7077972407378701, "grad_norm": 0.6041889786720276, "learning_rate": 9.447329216029117e-06, "loss": 0.394, "step": 1522 }, { "epoch": 0.7082622849170671, "grad_norm": 0.4233911633491516, "learning_rate": 9.44609194720373e-06, "loss": 0.3657, "step": 1523 }, { "epoch": 0.7087273290962641, "grad_norm": 0.542805552482605, "learning_rate": 9.44485337619023e-06, "loss": 0.3966, "step": 1524 }, { "epoch": 0.7091923732754611, "grad_norm": 0.5100137591362, "learning_rate": 9.443613503351375e-06, "loss": 0.3737, "step": 1525 }, { "epoch": 0.7096574174546582, "grad_norm": 0.4733894169330597, "learning_rate": 9.442372329050304e-06, "loss": 0.3776, "step": 1526 }, { "epoch": 0.7101224616338552, "grad_norm": 0.5760788917541504, "learning_rate": 9.441129853650534e-06, "loss": 0.3838, "step": 1527 }, { "epoch": 0.7105875058130522, "grad_norm": 0.5053048729896545, "learning_rate": 9.43988607751597e-06, "loss": 0.3741, "step": 1528 }, { "epoch": 0.7110525499922493, "grad_norm": 0.6125737428665161, "learning_rate": 9.43864100101089e-06, "loss": 0.3941, "step": 1529 }, { "epoch": 0.7115175941714463, "grad_norm": 0.6082503795623779, "learning_rate": 9.437394624499957e-06, "loss": 0.3769, "step": 1530 }, { "epoch": 0.7119826383506433, "grad_norm": 0.5114914178848267, "learning_rate": 9.43614694834822e-06, "loss": 0.3972, "step": 1531 }, { "epoch": 0.7124476825298404, "grad_norm": 0.5447007417678833, "learning_rate": 9.434897972921095e-06, "loss": 0.3758, "step": 1532 }, { "epoch": 0.7129127267090374, "grad_norm": 0.4863920509815216, "learning_rate": 9.433647698584393e-06, "loss": 0.385, "step": 1533 }, { "epoch": 0.7133777708882344, "grad_norm": 0.5778424739837646, "learning_rate": 9.432396125704294e-06, "loss": 0.3576, "step": 1534 }, { "epoch": 0.7138428150674314, "grad_norm": 0.541344940662384, "learning_rate": 9.431143254647368e-06, "loss": 0.4063, "step": 1535 }, { "epoch": 0.7143078592466284, "grad_norm": 0.5227439999580383, "learning_rate": 9.429889085780559e-06, "loss": 0.4045, "step": 1536 }, { "epoch": 0.7147729034258254, "grad_norm": 0.5893809795379639, "learning_rate": 9.42863361947119e-06, "loss": 0.3556, "step": 1537 }, { "epoch": 0.7152379476050225, "grad_norm": 0.45113706588745117, "learning_rate": 9.42737685608697e-06, "loss": 0.3984, "step": 1538 }, { "epoch": 0.7157029917842195, "grad_norm": 0.6279287338256836, "learning_rate": 9.426118795995984e-06, "loss": 0.368, "step": 1539 }, { "epoch": 0.7161680359634165, "grad_norm": 0.45758605003356934, "learning_rate": 9.424859439566696e-06, "loss": 0.378, "step": 1540 }, { "epoch": 0.7166330801426135, "grad_norm": 0.5116802453994751, "learning_rate": 9.423598787167952e-06, "loss": 0.3775, "step": 1541 }, { "epoch": 0.7170981243218105, "grad_norm": 0.5365654826164246, "learning_rate": 9.422336839168974e-06, "loss": 0.3701, "step": 1542 }, { "epoch": 0.7175631685010075, "grad_norm": 0.4998980760574341, "learning_rate": 9.421073595939373e-06, "loss": 0.3778, "step": 1543 }, { "epoch": 0.7180282126802047, "grad_norm": 0.5392967462539673, "learning_rate": 9.419809057849125e-06, "loss": 0.4003, "step": 1544 }, { "epoch": 0.7184932568594017, "grad_norm": 0.5534929633140564, "learning_rate": 9.418543225268598e-06, "loss": 0.3992, "step": 1545 }, { "epoch": 0.7189583010385987, "grad_norm": 0.45041725039482117, "learning_rate": 9.41727609856853e-06, "loss": 0.3951, "step": 1546 }, { "epoch": 0.7194233452177957, "grad_norm": 0.5060805082321167, "learning_rate": 9.416007678120041e-06, "loss": 0.3657, "step": 1547 }, { "epoch": 0.7198883893969927, "grad_norm": 0.4890674352645874, "learning_rate": 9.414737964294636e-06, "loss": 0.3763, "step": 1548 }, { "epoch": 0.7203534335761898, "grad_norm": 0.48126208782196045, "learning_rate": 9.41346695746419e-06, "loss": 0.403, "step": 1549 }, { "epoch": 0.7208184777553868, "grad_norm": 0.42710211873054504, "learning_rate": 9.41219465800096e-06, "loss": 0.408, "step": 1550 }, { "epoch": 0.7212835219345838, "grad_norm": 0.5024427771568298, "learning_rate": 9.410921066277583e-06, "loss": 0.3946, "step": 1551 }, { "epoch": 0.7217485661137808, "grad_norm": 0.5658032298088074, "learning_rate": 9.409646182667073e-06, "loss": 0.4095, "step": 1552 }, { "epoch": 0.7222136102929778, "grad_norm": 0.46872782707214355, "learning_rate": 9.408370007542822e-06, "loss": 0.3693, "step": 1553 }, { "epoch": 0.7226786544721748, "grad_norm": 0.5501477718353271, "learning_rate": 9.407092541278602e-06, "loss": 0.3939, "step": 1554 }, { "epoch": 0.7231436986513718, "grad_norm": 0.5214014649391174, "learning_rate": 9.405813784248562e-06, "loss": 0.3936, "step": 1555 }, { "epoch": 0.7236087428305689, "grad_norm": 0.47471073269844055, "learning_rate": 9.40453373682723e-06, "loss": 0.3869, "step": 1556 }, { "epoch": 0.7240737870097659, "grad_norm": 0.5597638487815857, "learning_rate": 9.403252399389508e-06, "loss": 0.3925, "step": 1557 }, { "epoch": 0.724538831188963, "grad_norm": 0.43586617708206177, "learning_rate": 9.401969772310681e-06, "loss": 0.3636, "step": 1558 }, { "epoch": 0.72500387536816, "grad_norm": 0.5254845023155212, "learning_rate": 9.400685855966411e-06, "loss": 0.3647, "step": 1559 }, { "epoch": 0.725468919547357, "grad_norm": 0.5308626294136047, "learning_rate": 9.399400650732735e-06, "loss": 0.3843, "step": 1560 }, { "epoch": 0.725933963726554, "grad_norm": 0.4976446032524109, "learning_rate": 9.398114156986068e-06, "loss": 0.4107, "step": 1561 }, { "epoch": 0.7263990079057511, "grad_norm": 0.47500959038734436, "learning_rate": 9.396826375103203e-06, "loss": 0.3745, "step": 1562 }, { "epoch": 0.7268640520849481, "grad_norm": 0.5488372445106506, "learning_rate": 9.395537305461312e-06, "loss": 0.3783, "step": 1563 }, { "epoch": 0.7273290962641451, "grad_norm": 0.48007380962371826, "learning_rate": 9.394246948437943e-06, "loss": 0.3835, "step": 1564 }, { "epoch": 0.7277941404433421, "grad_norm": 0.5635498762130737, "learning_rate": 9.392955304411015e-06, "loss": 0.38, "step": 1565 }, { "epoch": 0.7282591846225391, "grad_norm": 0.48347046971321106, "learning_rate": 9.391662373758836e-06, "loss": 0.4036, "step": 1566 }, { "epoch": 0.7287242288017362, "grad_norm": 0.448574423789978, "learning_rate": 9.390368156860083e-06, "loss": 0.3928, "step": 1567 }, { "epoch": 0.7291892729809332, "grad_norm": 0.4473694860935211, "learning_rate": 9.389072654093809e-06, "loss": 0.3579, "step": 1568 }, { "epoch": 0.7296543171601302, "grad_norm": 0.48732516169548035, "learning_rate": 9.387775865839449e-06, "loss": 0.3803, "step": 1569 }, { "epoch": 0.7301193613393272, "grad_norm": 0.46211230754852295, "learning_rate": 9.386477792476806e-06, "loss": 0.4012, "step": 1570 }, { "epoch": 0.7305844055185242, "grad_norm": 0.4076167643070221, "learning_rate": 9.38517843438607e-06, "loss": 0.3788, "step": 1571 }, { "epoch": 0.7310494496977212, "grad_norm": 0.5187703967094421, "learning_rate": 9.383877791947802e-06, "loss": 0.4111, "step": 1572 }, { "epoch": 0.7315144938769184, "grad_norm": 0.5166497230529785, "learning_rate": 9.382575865542933e-06, "loss": 0.3982, "step": 1573 }, { "epoch": 0.7319795380561154, "grad_norm": 0.4667831063270569, "learning_rate": 9.38127265555278e-06, "loss": 0.4002, "step": 1574 }, { "epoch": 0.7324445822353124, "grad_norm": 0.4515557289123535, "learning_rate": 9.379968162359034e-06, "loss": 0.3646, "step": 1575 }, { "epoch": 0.7329096264145094, "grad_norm": 0.48344147205352783, "learning_rate": 9.378662386343758e-06, "loss": 0.4046, "step": 1576 }, { "epoch": 0.7333746705937064, "grad_norm": 0.505967915058136, "learning_rate": 9.377355327889391e-06, "loss": 0.3664, "step": 1577 }, { "epoch": 0.7338397147729034, "grad_norm": 0.43122488260269165, "learning_rate": 9.37604698737875e-06, "loss": 0.3962, "step": 1578 }, { "epoch": 0.7343047589521005, "grad_norm": 0.4845030903816223, "learning_rate": 9.374737365195028e-06, "loss": 0.3937, "step": 1579 }, { "epoch": 0.7347698031312975, "grad_norm": 0.4910258948802948, "learning_rate": 9.37342646172179e-06, "loss": 0.3892, "step": 1580 }, { "epoch": 0.7352348473104945, "grad_norm": 0.4109004735946655, "learning_rate": 9.372114277342981e-06, "loss": 0.3735, "step": 1581 }, { "epoch": 0.7356998914896915, "grad_norm": 0.4791964888572693, "learning_rate": 9.370800812442917e-06, "loss": 0.3956, "step": 1582 }, { "epoch": 0.7361649356688885, "grad_norm": 0.4479603171348572, "learning_rate": 9.36948606740629e-06, "loss": 0.3587, "step": 1583 }, { "epoch": 0.7366299798480855, "grad_norm": 0.49391624331474304, "learning_rate": 9.36817004261817e-06, "loss": 0.3785, "step": 1584 }, { "epoch": 0.7370950240272826, "grad_norm": 0.4788481593132019, "learning_rate": 9.366852738463995e-06, "loss": 0.395, "step": 1585 }, { "epoch": 0.7375600682064796, "grad_norm": 0.4550861120223999, "learning_rate": 9.365534155329585e-06, "loss": 0.3826, "step": 1586 }, { "epoch": 0.7380251123856766, "grad_norm": 0.45728829503059387, "learning_rate": 9.364214293601133e-06, "loss": 0.3662, "step": 1587 }, { "epoch": 0.7384901565648737, "grad_norm": 0.46696847677230835, "learning_rate": 9.3628931536652e-06, "loss": 0.3751, "step": 1588 }, { "epoch": 0.7389552007440707, "grad_norm": 0.48484426736831665, "learning_rate": 9.361570735908731e-06, "loss": 0.3764, "step": 1589 }, { "epoch": 0.7394202449232677, "grad_norm": 0.4298027455806732, "learning_rate": 9.36024704071904e-06, "loss": 0.3879, "step": 1590 }, { "epoch": 0.7398852891024648, "grad_norm": 0.5281281471252441, "learning_rate": 9.358922068483813e-06, "loss": 0.3742, "step": 1591 }, { "epoch": 0.7403503332816618, "grad_norm": 0.39582934975624084, "learning_rate": 9.357595819591116e-06, "loss": 0.3886, "step": 1592 }, { "epoch": 0.7408153774608588, "grad_norm": 0.4835725724697113, "learning_rate": 9.356268294429384e-06, "loss": 0.3904, "step": 1593 }, { "epoch": 0.7412804216400558, "grad_norm": 0.49166139960289, "learning_rate": 9.354939493387428e-06, "loss": 0.4038, "step": 1594 }, { "epoch": 0.7417454658192528, "grad_norm": 0.4498700499534607, "learning_rate": 9.353609416854432e-06, "loss": 0.3906, "step": 1595 }, { "epoch": 0.7422105099984498, "grad_norm": 0.5919501781463623, "learning_rate": 9.352278065219955e-06, "loss": 0.3753, "step": 1596 }, { "epoch": 0.7426755541776469, "grad_norm": 0.5172655582427979, "learning_rate": 9.350945438873927e-06, "loss": 0.3858, "step": 1597 }, { "epoch": 0.7431405983568439, "grad_norm": 0.4497489333152771, "learning_rate": 9.349611538206654e-06, "loss": 0.3979, "step": 1598 }, { "epoch": 0.7436056425360409, "grad_norm": 0.4453744888305664, "learning_rate": 9.348276363608812e-06, "loss": 0.3854, "step": 1599 }, { "epoch": 0.7440706867152379, "grad_norm": 0.5212786793708801, "learning_rate": 9.346939915471453e-06, "loss": 0.3838, "step": 1600 }, { "epoch": 0.7445357308944349, "grad_norm": 0.5124901533126831, "learning_rate": 9.345602194186001e-06, "loss": 0.3859, "step": 1601 }, { "epoch": 0.745000775073632, "grad_norm": 0.5541741847991943, "learning_rate": 9.344263200144253e-06, "loss": 0.3653, "step": 1602 }, { "epoch": 0.7454658192528291, "grad_norm": 0.5633660554885864, "learning_rate": 9.342922933738377e-06, "loss": 0.3784, "step": 1603 }, { "epoch": 0.7459308634320261, "grad_norm": 0.4769454300403595, "learning_rate": 9.341581395360917e-06, "loss": 0.389, "step": 1604 }, { "epoch": 0.7463959076112231, "grad_norm": 0.48664960265159607, "learning_rate": 9.340238585404787e-06, "loss": 0.3782, "step": 1605 }, { "epoch": 0.7468609517904201, "grad_norm": 0.45407751202583313, "learning_rate": 9.338894504263276e-06, "loss": 0.38, "step": 1606 }, { "epoch": 0.7473259959696171, "grad_norm": 0.47645387053489685, "learning_rate": 9.33754915233004e-06, "loss": 0.3541, "step": 1607 }, { "epoch": 0.7477910401488141, "grad_norm": 0.5349624752998352, "learning_rate": 9.336202529999114e-06, "loss": 0.3867, "step": 1608 }, { "epoch": 0.7482560843280112, "grad_norm": 0.49598512053489685, "learning_rate": 9.3348546376649e-06, "loss": 0.3848, "step": 1609 }, { "epoch": 0.7487211285072082, "grad_norm": 0.5270540714263916, "learning_rate": 9.333505475722175e-06, "loss": 0.3953, "step": 1610 }, { "epoch": 0.7491861726864052, "grad_norm": 0.4383714199066162, "learning_rate": 9.332155044566085e-06, "loss": 0.3936, "step": 1611 }, { "epoch": 0.7496512168656022, "grad_norm": 0.4920140206813812, "learning_rate": 9.330803344592151e-06, "loss": 0.3805, "step": 1612 }, { "epoch": 0.7501162610447992, "grad_norm": 0.5141205787658691, "learning_rate": 9.329450376196264e-06, "loss": 0.3821, "step": 1613 }, { "epoch": 0.7505813052239962, "grad_norm": 0.5176628828048706, "learning_rate": 9.328096139774686e-06, "loss": 0.3903, "step": 1614 }, { "epoch": 0.7510463494031933, "grad_norm": 0.5027486085891724, "learning_rate": 9.326740635724047e-06, "loss": 0.3814, "step": 1615 }, { "epoch": 0.7515113935823903, "grad_norm": 0.504041314125061, "learning_rate": 9.32538386444136e-06, "loss": 0.3701, "step": 1616 }, { "epoch": 0.7519764377615874, "grad_norm": 0.4461967647075653, "learning_rate": 9.324025826323995e-06, "loss": 0.3603, "step": 1617 }, { "epoch": 0.7524414819407844, "grad_norm": 0.4477754831314087, "learning_rate": 9.3226665217697e-06, "loss": 0.3585, "step": 1618 }, { "epoch": 0.7529065261199814, "grad_norm": 0.48952674865722656, "learning_rate": 9.321305951176597e-06, "loss": 0.3974, "step": 1619 }, { "epoch": 0.7533715702991785, "grad_norm": 0.4486616253852844, "learning_rate": 9.319944114943171e-06, "loss": 0.3745, "step": 1620 }, { "epoch": 0.7538366144783755, "grad_norm": 0.5282906293869019, "learning_rate": 9.318581013468285e-06, "loss": 0.3743, "step": 1621 }, { "epoch": 0.7543016586575725, "grad_norm": 0.5025021433830261, "learning_rate": 9.317216647151166e-06, "loss": 0.385, "step": 1622 }, { "epoch": 0.7547667028367695, "grad_norm": 0.48723548650741577, "learning_rate": 9.315851016391417e-06, "loss": 0.3891, "step": 1623 }, { "epoch": 0.7552317470159665, "grad_norm": 0.5467572212219238, "learning_rate": 9.31448412158901e-06, "loss": 0.3931, "step": 1624 }, { "epoch": 0.7556967911951635, "grad_norm": 0.503555178642273, "learning_rate": 9.313115963144281e-06, "loss": 0.3466, "step": 1625 }, { "epoch": 0.7561618353743605, "grad_norm": 0.570252537727356, "learning_rate": 9.311746541457946e-06, "loss": 0.3588, "step": 1626 }, { "epoch": 0.7566268795535576, "grad_norm": 0.49849367141723633, "learning_rate": 9.310375856931086e-06, "loss": 0.4005, "step": 1627 }, { "epoch": 0.7570919237327546, "grad_norm": 0.4657578766345978, "learning_rate": 9.309003909965152e-06, "loss": 0.3811, "step": 1628 }, { "epoch": 0.7575569679119516, "grad_norm": 0.5395064353942871, "learning_rate": 9.307630700961966e-06, "loss": 0.4136, "step": 1629 }, { "epoch": 0.7580220120911486, "grad_norm": 0.43396592140197754, "learning_rate": 9.306256230323714e-06, "loss": 0.3834, "step": 1630 }, { "epoch": 0.7584870562703456, "grad_norm": 0.4718068838119507, "learning_rate": 9.304880498452962e-06, "loss": 0.3759, "step": 1631 }, { "epoch": 0.7589521004495428, "grad_norm": 0.49737459421157837, "learning_rate": 9.303503505752636e-06, "loss": 0.3928, "step": 1632 }, { "epoch": 0.7594171446287398, "grad_norm": 0.4730997681617737, "learning_rate": 9.302125252626035e-06, "loss": 0.3916, "step": 1633 }, { "epoch": 0.7598821888079368, "grad_norm": 0.49301835894584656, "learning_rate": 9.30074573947683e-06, "loss": 0.3821, "step": 1634 }, { "epoch": 0.7603472329871338, "grad_norm": 0.5187510251998901, "learning_rate": 9.299364966709051e-06, "loss": 0.3527, "step": 1635 }, { "epoch": 0.7608122771663308, "grad_norm": 0.5241015553474426, "learning_rate": 9.29798293472711e-06, "loss": 0.378, "step": 1636 }, { "epoch": 0.7612773213455278, "grad_norm": 0.4729224443435669, "learning_rate": 9.296599643935782e-06, "loss": 0.4126, "step": 1637 }, { "epoch": 0.7617423655247249, "grad_norm": 0.6277045011520386, "learning_rate": 9.295215094740208e-06, "loss": 0.4036, "step": 1638 }, { "epoch": 0.7622074097039219, "grad_norm": 0.4564441740512848, "learning_rate": 9.293829287545902e-06, "loss": 0.3894, "step": 1639 }, { "epoch": 0.7626724538831189, "grad_norm": 0.48436111211776733, "learning_rate": 9.292442222758741e-06, "loss": 0.3695, "step": 1640 }, { "epoch": 0.7631374980623159, "grad_norm": 0.5133222341537476, "learning_rate": 9.291053900784977e-06, "loss": 0.3843, "step": 1641 }, { "epoch": 0.7636025422415129, "grad_norm": 0.4502391517162323, "learning_rate": 9.289664322031225e-06, "loss": 0.3745, "step": 1642 }, { "epoch": 0.7640675864207099, "grad_norm": 0.5892338156700134, "learning_rate": 9.28827348690447e-06, "loss": 0.3855, "step": 1643 }, { "epoch": 0.764532630599907, "grad_norm": 0.4681336283683777, "learning_rate": 9.286881395812066e-06, "loss": 0.3817, "step": 1644 }, { "epoch": 0.764997674779104, "grad_norm": 0.5281332731246948, "learning_rate": 9.285488049161735e-06, "loss": 0.3903, "step": 1645 }, { "epoch": 0.7654627189583011, "grad_norm": 0.5051922798156738, "learning_rate": 9.284093447361563e-06, "loss": 0.4035, "step": 1646 }, { "epoch": 0.7659277631374981, "grad_norm": 0.4733637273311615, "learning_rate": 9.282697590820008e-06, "loss": 0.3933, "step": 1647 }, { "epoch": 0.7663928073166951, "grad_norm": 0.43565672636032104, "learning_rate": 9.281300479945894e-06, "loss": 0.3654, "step": 1648 }, { "epoch": 0.7668578514958921, "grad_norm": 0.49669885635375977, "learning_rate": 9.27990211514841e-06, "loss": 0.3514, "step": 1649 }, { "epoch": 0.7673228956750892, "grad_norm": 0.4607134163379669, "learning_rate": 9.278502496837116e-06, "loss": 0.3954, "step": 1650 }, { "epoch": 0.7677879398542862, "grad_norm": 0.5022844672203064, "learning_rate": 9.277101625421938e-06, "loss": 0.3917, "step": 1651 }, { "epoch": 0.7682529840334832, "grad_norm": 0.5731377601623535, "learning_rate": 9.275699501313164e-06, "loss": 0.4014, "step": 1652 }, { "epoch": 0.7687180282126802, "grad_norm": 0.44675296545028687, "learning_rate": 9.27429612492146e-06, "loss": 0.3754, "step": 1653 }, { "epoch": 0.7691830723918772, "grad_norm": 0.6787543296813965, "learning_rate": 9.27289149665785e-06, "loss": 0.3779, "step": 1654 }, { "epoch": 0.7696481165710742, "grad_norm": 0.6024219989776611, "learning_rate": 9.271485616933725e-06, "loss": 0.3736, "step": 1655 }, { "epoch": 0.7701131607502713, "grad_norm": 0.49429091811180115, "learning_rate": 9.270078486160843e-06, "loss": 0.3944, "step": 1656 }, { "epoch": 0.7705782049294683, "grad_norm": 0.6208419799804688, "learning_rate": 9.268670104751334e-06, "loss": 0.3796, "step": 1657 }, { "epoch": 0.7710432491086653, "grad_norm": 0.5730566382408142, "learning_rate": 9.267260473117687e-06, "loss": 0.3936, "step": 1658 }, { "epoch": 0.7715082932878623, "grad_norm": 0.4064892828464508, "learning_rate": 9.265849591672762e-06, "loss": 0.3665, "step": 1659 }, { "epoch": 0.7719733374670593, "grad_norm": 0.6317027807235718, "learning_rate": 9.264437460829783e-06, "loss": 0.3973, "step": 1660 }, { "epoch": 0.7724383816462564, "grad_norm": 0.4885081350803375, "learning_rate": 9.263024081002338e-06, "loss": 0.401, "step": 1661 }, { "epoch": 0.7729034258254535, "grad_norm": 0.4672366976737976, "learning_rate": 9.261609452604387e-06, "loss": 0.3705, "step": 1662 }, { "epoch": 0.7733684700046505, "grad_norm": 0.5605355501174927, "learning_rate": 9.260193576050247e-06, "loss": 0.4115, "step": 1663 }, { "epoch": 0.7738335141838475, "grad_norm": 0.5084188580513, "learning_rate": 9.25877645175461e-06, "loss": 0.3905, "step": 1664 }, { "epoch": 0.7742985583630445, "grad_norm": 0.4235016107559204, "learning_rate": 9.257358080132524e-06, "loss": 0.3808, "step": 1665 }, { "epoch": 0.7747636025422415, "grad_norm": 0.5295476913452148, "learning_rate": 9.25593846159941e-06, "loss": 0.3704, "step": 1666 }, { "epoch": 0.7752286467214385, "grad_norm": 0.4431585669517517, "learning_rate": 9.25451759657105e-06, "loss": 0.3784, "step": 1667 }, { "epoch": 0.7756936909006356, "grad_norm": 0.43355417251586914, "learning_rate": 9.253095485463594e-06, "loss": 0.3708, "step": 1668 }, { "epoch": 0.7761587350798326, "grad_norm": 0.5399253368377686, "learning_rate": 9.251672128693553e-06, "loss": 0.3887, "step": 1669 }, { "epoch": 0.7766237792590296, "grad_norm": 0.47674787044525146, "learning_rate": 9.250247526677806e-06, "loss": 0.4136, "step": 1670 }, { "epoch": 0.7770888234382266, "grad_norm": 0.39808011054992676, "learning_rate": 9.248821679833596e-06, "loss": 0.3894, "step": 1671 }, { "epoch": 0.7775538676174236, "grad_norm": 0.5415627360343933, "learning_rate": 9.24739458857853e-06, "loss": 0.3698, "step": 1672 }, { "epoch": 0.7780189117966206, "grad_norm": 0.4286542534828186, "learning_rate": 9.245966253330581e-06, "loss": 0.3781, "step": 1673 }, { "epoch": 0.7784839559758177, "grad_norm": 0.42257702350616455, "learning_rate": 9.244536674508085e-06, "loss": 0.3868, "step": 1674 }, { "epoch": 0.7789490001550148, "grad_norm": 0.41875237226486206, "learning_rate": 9.243105852529739e-06, "loss": 0.3594, "step": 1675 }, { "epoch": 0.7794140443342118, "grad_norm": 0.45857277512550354, "learning_rate": 9.241673787814612e-06, "loss": 0.4015, "step": 1676 }, { "epoch": 0.7798790885134088, "grad_norm": 0.4585971236228943, "learning_rate": 9.24024048078213e-06, "loss": 0.3895, "step": 1677 }, { "epoch": 0.7803441326926058, "grad_norm": 0.4482724666595459, "learning_rate": 9.238805931852088e-06, "loss": 0.3787, "step": 1678 }, { "epoch": 0.7808091768718028, "grad_norm": 0.4379400312900543, "learning_rate": 9.237370141444636e-06, "loss": 0.3712, "step": 1679 }, { "epoch": 0.7812742210509999, "grad_norm": 0.452578604221344, "learning_rate": 9.235933109980302e-06, "loss": 0.4232, "step": 1680 }, { "epoch": 0.7817392652301969, "grad_norm": 0.4528006613254547, "learning_rate": 9.234494837879963e-06, "loss": 0.3626, "step": 1681 }, { "epoch": 0.7822043094093939, "grad_norm": 0.4689446985721588, "learning_rate": 9.233055325564869e-06, "loss": 0.386, "step": 1682 }, { "epoch": 0.7826693535885909, "grad_norm": 0.4909113645553589, "learning_rate": 9.231614573456628e-06, "loss": 0.3899, "step": 1683 }, { "epoch": 0.7831343977677879, "grad_norm": 0.45796748995780945, "learning_rate": 9.230172581977212e-06, "loss": 0.3907, "step": 1684 }, { "epoch": 0.7835994419469849, "grad_norm": 0.4643401801586151, "learning_rate": 9.22872935154896e-06, "loss": 0.3749, "step": 1685 }, { "epoch": 0.784064486126182, "grad_norm": 0.5391439199447632, "learning_rate": 9.227284882594567e-06, "loss": 0.3901, "step": 1686 }, { "epoch": 0.784529530305379, "grad_norm": 0.4336642920970917, "learning_rate": 9.225839175537096e-06, "loss": 0.3952, "step": 1687 }, { "epoch": 0.784994574484576, "grad_norm": 0.4696858823299408, "learning_rate": 9.224392230799972e-06, "loss": 0.3925, "step": 1688 }, { "epoch": 0.785459618663773, "grad_norm": 0.43825915455818176, "learning_rate": 9.222944048806982e-06, "loss": 0.3605, "step": 1689 }, { "epoch": 0.7859246628429701, "grad_norm": 0.5373666286468506, "learning_rate": 9.221494629982274e-06, "loss": 0.3634, "step": 1690 }, { "epoch": 0.7863897070221672, "grad_norm": 0.4807939827442169, "learning_rate": 9.22004397475036e-06, "loss": 0.3908, "step": 1691 }, { "epoch": 0.7868547512013642, "grad_norm": 0.462829053401947, "learning_rate": 9.21859208353611e-06, "loss": 0.351, "step": 1692 }, { "epoch": 0.7873197953805612, "grad_norm": 0.49336567521095276, "learning_rate": 9.217138956764764e-06, "loss": 0.3722, "step": 1693 }, { "epoch": 0.7877848395597582, "grad_norm": 0.5379830002784729, "learning_rate": 9.215684594861915e-06, "loss": 0.3967, "step": 1694 }, { "epoch": 0.7882498837389552, "grad_norm": 0.4929734766483307, "learning_rate": 9.214228998253526e-06, "loss": 0.3666, "step": 1695 }, { "epoch": 0.7887149279181522, "grad_norm": 0.5308467149734497, "learning_rate": 9.212772167365915e-06, "loss": 0.3863, "step": 1696 }, { "epoch": 0.7891799720973492, "grad_norm": 0.43540507555007935, "learning_rate": 9.211314102625768e-06, "loss": 0.4, "step": 1697 }, { "epoch": 0.7896450162765463, "grad_norm": 0.5050954222679138, "learning_rate": 9.209854804460121e-06, "loss": 0.3893, "step": 1698 }, { "epoch": 0.7901100604557433, "grad_norm": 0.5224504470825195, "learning_rate": 9.208394273296387e-06, "loss": 0.3957, "step": 1699 }, { "epoch": 0.7905751046349403, "grad_norm": 0.5353153944015503, "learning_rate": 9.206932509562325e-06, "loss": 0.3839, "step": 1700 }, { "epoch": 0.7910401488141373, "grad_norm": 0.5350932478904724, "learning_rate": 9.205469513686065e-06, "loss": 0.4252, "step": 1701 }, { "epoch": 0.7915051929933343, "grad_norm": 0.5694266557693481, "learning_rate": 9.204005286096095e-06, "loss": 0.3813, "step": 1702 }, { "epoch": 0.7919702371725313, "grad_norm": 0.5682549476623535, "learning_rate": 9.202539827221264e-06, "loss": 0.3519, "step": 1703 }, { "epoch": 0.7924352813517284, "grad_norm": 0.4279302656650543, "learning_rate": 9.20107313749078e-06, "loss": 0.3863, "step": 1704 }, { "epoch": 0.7929003255309255, "grad_norm": 0.5173167586326599, "learning_rate": 9.19960521733421e-06, "loss": 0.3747, "step": 1705 }, { "epoch": 0.7933653697101225, "grad_norm": 0.5256390571594238, "learning_rate": 9.198136067181491e-06, "loss": 0.3963, "step": 1706 }, { "epoch": 0.7938304138893195, "grad_norm": 0.5231375098228455, "learning_rate": 9.196665687462906e-06, "loss": 0.3706, "step": 1707 }, { "epoch": 0.7942954580685165, "grad_norm": 0.4618844985961914, "learning_rate": 9.19519407860911e-06, "loss": 0.3796, "step": 1708 }, { "epoch": 0.7947605022477136, "grad_norm": 0.49417373538017273, "learning_rate": 9.193721241051108e-06, "loss": 0.3847, "step": 1709 }, { "epoch": 0.7952255464269106, "grad_norm": 0.5576480031013489, "learning_rate": 9.192247175220276e-06, "loss": 0.3747, "step": 1710 }, { "epoch": 0.7956905906061076, "grad_norm": 0.5018484592437744, "learning_rate": 9.190771881548343e-06, "loss": 0.4008, "step": 1711 }, { "epoch": 0.7961556347853046, "grad_norm": 0.48867353796958923, "learning_rate": 9.189295360467397e-06, "loss": 0.3806, "step": 1712 }, { "epoch": 0.7966206789645016, "grad_norm": 0.5056695342063904, "learning_rate": 9.187817612409886e-06, "loss": 0.3718, "step": 1713 }, { "epoch": 0.7970857231436986, "grad_norm": 0.45721235871315, "learning_rate": 9.18633863780862e-06, "loss": 0.3794, "step": 1714 }, { "epoch": 0.7975507673228956, "grad_norm": 0.487956702709198, "learning_rate": 9.184858437096766e-06, "loss": 0.3905, "step": 1715 }, { "epoch": 0.7980158115020927, "grad_norm": 0.5465155243873596, "learning_rate": 9.183377010707853e-06, "loss": 0.3968, "step": 1716 }, { "epoch": 0.7984808556812897, "grad_norm": 0.5411269068717957, "learning_rate": 9.181894359075763e-06, "loss": 0.3826, "step": 1717 }, { "epoch": 0.7989458998604867, "grad_norm": 0.511201798915863, "learning_rate": 9.180410482634744e-06, "loss": 0.3873, "step": 1718 }, { "epoch": 0.7994109440396838, "grad_norm": 0.5773457288742065, "learning_rate": 9.178925381819396e-06, "loss": 0.4048, "step": 1719 }, { "epoch": 0.7998759882188808, "grad_norm": 0.5756568312644958, "learning_rate": 9.177439057064684e-06, "loss": 0.3772, "step": 1720 }, { "epoch": 0.8003410323980779, "grad_norm": 0.5717048048973083, "learning_rate": 9.175951508805924e-06, "loss": 0.3796, "step": 1721 }, { "epoch": 0.8008060765772749, "grad_norm": 0.5515778064727783, "learning_rate": 9.174462737478801e-06, "loss": 0.3781, "step": 1722 }, { "epoch": 0.8012711207564719, "grad_norm": 0.6067471504211426, "learning_rate": 9.172972743519348e-06, "loss": 0.3922, "step": 1723 }, { "epoch": 0.8017361649356689, "grad_norm": 0.4658670127391815, "learning_rate": 9.17148152736396e-06, "loss": 0.3948, "step": 1724 }, { "epoch": 0.8022012091148659, "grad_norm": 0.5393667817115784, "learning_rate": 9.16998908944939e-06, "loss": 0.3883, "step": 1725 }, { "epoch": 0.8026662532940629, "grad_norm": 0.5518912076950073, "learning_rate": 9.168495430212752e-06, "loss": 0.4006, "step": 1726 }, { "epoch": 0.80313129747326, "grad_norm": 0.4799440801143646, "learning_rate": 9.16700055009151e-06, "loss": 0.3985, "step": 1727 }, { "epoch": 0.803596341652457, "grad_norm": 0.48832663893699646, "learning_rate": 9.165504449523492e-06, "loss": 0.3831, "step": 1728 }, { "epoch": 0.804061385831654, "grad_norm": 0.4758247435092926, "learning_rate": 9.164007128946881e-06, "loss": 0.3625, "step": 1729 }, { "epoch": 0.804526430010851, "grad_norm": 0.4202597439289093, "learning_rate": 9.162508588800221e-06, "loss": 0.3416, "step": 1730 }, { "epoch": 0.804991474190048, "grad_norm": 0.44846394658088684, "learning_rate": 9.161008829522406e-06, "loss": 0.3712, "step": 1731 }, { "epoch": 0.805456518369245, "grad_norm": 0.4517316222190857, "learning_rate": 9.159507851552693e-06, "loss": 0.3893, "step": 1732 }, { "epoch": 0.805921562548442, "grad_norm": 0.38894450664520264, "learning_rate": 9.158005655330694e-06, "loss": 0.3726, "step": 1733 }, { "epoch": 0.8063866067276392, "grad_norm": 0.45114031434059143, "learning_rate": 9.156502241296376e-06, "loss": 0.3862, "step": 1734 }, { "epoch": 0.8068516509068362, "grad_norm": 0.4183841943740845, "learning_rate": 9.154997609890068e-06, "loss": 0.3567, "step": 1735 }, { "epoch": 0.8073166950860332, "grad_norm": 0.4706588387489319, "learning_rate": 9.15349176155245e-06, "loss": 0.3892, "step": 1736 }, { "epoch": 0.8077817392652302, "grad_norm": 0.4675155282020569, "learning_rate": 9.151984696724563e-06, "loss": 0.4099, "step": 1737 }, { "epoch": 0.8082467834444272, "grad_norm": 0.46057257056236267, "learning_rate": 9.150476415847797e-06, "loss": 0.3915, "step": 1738 }, { "epoch": 0.8087118276236243, "grad_norm": 0.41681477427482605, "learning_rate": 9.148966919363906e-06, "loss": 0.3839, "step": 1739 }, { "epoch": 0.8091768718028213, "grad_norm": 0.4783117473125458, "learning_rate": 9.147456207714998e-06, "loss": 0.3541, "step": 1740 }, { "epoch": 0.8096419159820183, "grad_norm": 0.5050014853477478, "learning_rate": 9.145944281343534e-06, "loss": 0.36, "step": 1741 }, { "epoch": 0.8101069601612153, "grad_norm": 0.4405074715614319, "learning_rate": 9.144431140692332e-06, "loss": 0.4016, "step": 1742 }, { "epoch": 0.8105720043404123, "grad_norm": 0.41197729110717773, "learning_rate": 9.142916786204568e-06, "loss": 0.3787, "step": 1743 }, { "epoch": 0.8110370485196093, "grad_norm": 0.4868341386318207, "learning_rate": 9.141401218323772e-06, "loss": 0.3678, "step": 1744 }, { "epoch": 0.8115020926988064, "grad_norm": 0.4434308409690857, "learning_rate": 9.139884437493828e-06, "loss": 0.3633, "step": 1745 }, { "epoch": 0.8119671368780034, "grad_norm": 0.4891102612018585, "learning_rate": 9.138366444158977e-06, "loss": 0.3761, "step": 1746 }, { "epoch": 0.8124321810572004, "grad_norm": 0.5441362261772156, "learning_rate": 9.136847238763814e-06, "loss": 0.3794, "step": 1747 }, { "epoch": 0.8128972252363974, "grad_norm": 0.44566887617111206, "learning_rate": 9.135326821753291e-06, "loss": 0.3944, "step": 1748 }, { "epoch": 0.8133622694155945, "grad_norm": 0.4834020733833313, "learning_rate": 9.133805193572713e-06, "loss": 0.3902, "step": 1749 }, { "epoch": 0.8138273135947915, "grad_norm": 0.46305882930755615, "learning_rate": 9.132282354667741e-06, "loss": 0.3725, "step": 1750 }, { "epoch": 0.8142923577739886, "grad_norm": 0.4824235141277313, "learning_rate": 9.130758305484387e-06, "loss": 0.4076, "step": 1751 }, { "epoch": 0.8147574019531856, "grad_norm": 0.5479027032852173, "learning_rate": 9.129233046469021e-06, "loss": 0.3881, "step": 1752 }, { "epoch": 0.8152224461323826, "grad_norm": 0.42961257696151733, "learning_rate": 9.127706578068369e-06, "loss": 0.3726, "step": 1753 }, { "epoch": 0.8156874903115796, "grad_norm": 0.4997014105319977, "learning_rate": 9.126178900729507e-06, "loss": 0.3848, "step": 1754 }, { "epoch": 0.8161525344907766, "grad_norm": 0.5122492909431458, "learning_rate": 9.124650014899868e-06, "loss": 0.398, "step": 1755 }, { "epoch": 0.8166175786699736, "grad_norm": 0.5152801275253296, "learning_rate": 9.123119921027234e-06, "loss": 0.383, "step": 1756 }, { "epoch": 0.8170826228491707, "grad_norm": 0.4216311275959015, "learning_rate": 9.121588619559752e-06, "loss": 0.3729, "step": 1757 }, { "epoch": 0.8175476670283677, "grad_norm": 0.5480169057846069, "learning_rate": 9.120056110945907e-06, "loss": 0.4093, "step": 1758 }, { "epoch": 0.8180127112075647, "grad_norm": 0.6168447732925415, "learning_rate": 9.118522395634552e-06, "loss": 0.3907, "step": 1759 }, { "epoch": 0.8184777553867617, "grad_norm": 0.45844393968582153, "learning_rate": 9.116987474074885e-06, "loss": 0.3886, "step": 1760 }, { "epoch": 0.8189427995659587, "grad_norm": 0.543725311756134, "learning_rate": 9.115451346716459e-06, "loss": 0.3968, "step": 1761 }, { "epoch": 0.8194078437451557, "grad_norm": 0.48329514265060425, "learning_rate": 9.113914014009182e-06, "loss": 0.3837, "step": 1762 }, { "epoch": 0.8198728879243529, "grad_norm": 0.4946017265319824, "learning_rate": 9.112375476403313e-06, "loss": 0.3877, "step": 1763 }, { "epoch": 0.8203379321035499, "grad_norm": 0.5260226130485535, "learning_rate": 9.110835734349464e-06, "loss": 0.3735, "step": 1764 }, { "epoch": 0.8208029762827469, "grad_norm": 0.49095699191093445, "learning_rate": 9.109294788298601e-06, "loss": 0.3832, "step": 1765 }, { "epoch": 0.8212680204619439, "grad_norm": 0.5328912138938904, "learning_rate": 9.107752638702046e-06, "loss": 0.3704, "step": 1766 }, { "epoch": 0.8217330646411409, "grad_norm": 0.5146880745887756, "learning_rate": 9.106209286011463e-06, "loss": 0.3679, "step": 1767 }, { "epoch": 0.822198108820338, "grad_norm": 0.6965378522872925, "learning_rate": 9.104664730678878e-06, "loss": 0.3939, "step": 1768 }, { "epoch": 0.822663152999535, "grad_norm": 0.4589329659938812, "learning_rate": 9.103118973156667e-06, "loss": 0.3813, "step": 1769 }, { "epoch": 0.823128197178732, "grad_norm": 0.4879869222640991, "learning_rate": 9.101572013897555e-06, "loss": 0.3608, "step": 1770 }, { "epoch": 0.823593241357929, "grad_norm": 0.5170464515686035, "learning_rate": 9.100023853354624e-06, "loss": 0.3932, "step": 1771 }, { "epoch": 0.824058285537126, "grad_norm": 0.48912927508354187, "learning_rate": 9.098474491981305e-06, "loss": 0.3588, "step": 1772 }, { "epoch": 0.824523329716323, "grad_norm": 0.4621887803077698, "learning_rate": 9.096923930231377e-06, "loss": 0.3501, "step": 1773 }, { "epoch": 0.82498837389552, "grad_norm": 0.6245998740196228, "learning_rate": 9.095372168558977e-06, "loss": 0.3937, "step": 1774 }, { "epoch": 0.8254534180747171, "grad_norm": 0.43584007024765015, "learning_rate": 9.09381920741859e-06, "loss": 0.3947, "step": 1775 }, { "epoch": 0.8259184622539141, "grad_norm": 0.5115625262260437, "learning_rate": 9.092265047265057e-06, "loss": 0.3878, "step": 1776 }, { "epoch": 0.8263835064331111, "grad_norm": 0.5149562954902649, "learning_rate": 9.090709688553561e-06, "loss": 0.4063, "step": 1777 }, { "epoch": 0.8268485506123082, "grad_norm": 0.4879224896430969, "learning_rate": 9.089153131739642e-06, "loss": 0.3646, "step": 1778 }, { "epoch": 0.8273135947915052, "grad_norm": 0.4819486737251282, "learning_rate": 9.087595377279192e-06, "loss": 0.3915, "step": 1779 }, { "epoch": 0.8277786389707023, "grad_norm": 0.5894208550453186, "learning_rate": 9.086036425628453e-06, "loss": 0.395, "step": 1780 }, { "epoch": 0.8282436831498993, "grad_norm": 0.45492544770240784, "learning_rate": 9.084476277244013e-06, "loss": 0.3976, "step": 1781 }, { "epoch": 0.8287087273290963, "grad_norm": 0.4946926236152649, "learning_rate": 9.082914932582818e-06, "loss": 0.3795, "step": 1782 }, { "epoch": 0.8291737715082933, "grad_norm": 0.5285559892654419, "learning_rate": 9.081352392102159e-06, "loss": 0.3766, "step": 1783 }, { "epoch": 0.8296388156874903, "grad_norm": 0.45280373096466064, "learning_rate": 9.079788656259677e-06, "loss": 0.3862, "step": 1784 }, { "epoch": 0.8301038598666873, "grad_norm": 0.5158466100692749, "learning_rate": 9.078223725513366e-06, "loss": 0.3877, "step": 1785 }, { "epoch": 0.8305689040458843, "grad_norm": 0.5172114968299866, "learning_rate": 9.076657600321569e-06, "loss": 0.395, "step": 1786 }, { "epoch": 0.8310339482250814, "grad_norm": 0.4534607529640198, "learning_rate": 9.07509028114298e-06, "loss": 0.3724, "step": 1787 }, { "epoch": 0.8314989924042784, "grad_norm": 0.45312753319740295, "learning_rate": 9.073521768436638e-06, "loss": 0.3771, "step": 1788 }, { "epoch": 0.8319640365834754, "grad_norm": 0.4360067844390869, "learning_rate": 9.071952062661938e-06, "loss": 0.3927, "step": 1789 }, { "epoch": 0.8324290807626724, "grad_norm": 0.4952300190925598, "learning_rate": 9.070381164278622e-06, "loss": 0.3784, "step": 1790 }, { "epoch": 0.8328941249418694, "grad_norm": 0.4036773145198822, "learning_rate": 9.068809073746776e-06, "loss": 0.3878, "step": 1791 }, { "epoch": 0.8333591691210666, "grad_norm": 0.4290243089199066, "learning_rate": 9.067235791526844e-06, "loss": 0.3467, "step": 1792 }, { "epoch": 0.8338242133002636, "grad_norm": 0.49693888425827026, "learning_rate": 9.065661318079613e-06, "loss": 0.3583, "step": 1793 }, { "epoch": 0.8342892574794606, "grad_norm": 0.5083548426628113, "learning_rate": 9.064085653866222e-06, "loss": 0.3691, "step": 1794 }, { "epoch": 0.8347543016586576, "grad_norm": 0.47161993384361267, "learning_rate": 9.062508799348155e-06, "loss": 0.3803, "step": 1795 }, { "epoch": 0.8352193458378546, "grad_norm": 0.4640964865684509, "learning_rate": 9.06093075498725e-06, "loss": 0.3636, "step": 1796 }, { "epoch": 0.8356843900170516, "grad_norm": 0.4891905188560486, "learning_rate": 9.059351521245688e-06, "loss": 0.3904, "step": 1797 }, { "epoch": 0.8361494341962487, "grad_norm": 0.5055262446403503, "learning_rate": 9.057771098586003e-06, "loss": 0.4009, "step": 1798 }, { "epoch": 0.8366144783754457, "grad_norm": 0.44761812686920166, "learning_rate": 9.056189487471074e-06, "loss": 0.3927, "step": 1799 }, { "epoch": 0.8370795225546427, "grad_norm": 0.49880966544151306, "learning_rate": 9.05460668836413e-06, "loss": 0.4054, "step": 1800 }, { "epoch": 0.8375445667338397, "grad_norm": 0.427062451839447, "learning_rate": 9.053022701728744e-06, "loss": 0.3694, "step": 1801 }, { "epoch": 0.8380096109130367, "grad_norm": 0.48414304852485657, "learning_rate": 9.051437528028846e-06, "loss": 0.3839, "step": 1802 }, { "epoch": 0.8384746550922337, "grad_norm": 0.5623500347137451, "learning_rate": 9.049851167728702e-06, "loss": 0.405, "step": 1803 }, { "epoch": 0.8389396992714307, "grad_norm": 0.4517265558242798, "learning_rate": 9.048263621292934e-06, "loss": 0.381, "step": 1804 }, { "epoch": 0.8394047434506278, "grad_norm": 0.6233772039413452, "learning_rate": 9.046674889186509e-06, "loss": 0.4002, "step": 1805 }, { "epoch": 0.8398697876298248, "grad_norm": 0.5412980914115906, "learning_rate": 9.045084971874738e-06, "loss": 0.3837, "step": 1806 }, { "epoch": 0.8403348318090219, "grad_norm": 0.6208202838897705, "learning_rate": 9.043493869823283e-06, "loss": 0.4094, "step": 1807 }, { "epoch": 0.8407998759882189, "grad_norm": 0.5141086578369141, "learning_rate": 9.041901583498156e-06, "loss": 0.4012, "step": 1808 }, { "epoch": 0.8412649201674159, "grad_norm": 0.579380452632904, "learning_rate": 9.040308113365706e-06, "loss": 0.4096, "step": 1809 }, { "epoch": 0.841729964346613, "grad_norm": 0.5328314304351807, "learning_rate": 9.038713459892637e-06, "loss": 0.3613, "step": 1810 }, { "epoch": 0.84219500852581, "grad_norm": 0.4614507853984833, "learning_rate": 9.037117623545998e-06, "loss": 0.4189, "step": 1811 }, { "epoch": 0.842660052705007, "grad_norm": 0.6282251477241516, "learning_rate": 9.035520604793183e-06, "loss": 0.389, "step": 1812 }, { "epoch": 0.843125096884204, "grad_norm": 0.4729471206665039, "learning_rate": 9.03392240410193e-06, "loss": 0.3953, "step": 1813 }, { "epoch": 0.843590141063401, "grad_norm": 0.5759819149971008, "learning_rate": 9.03232302194033e-06, "loss": 0.3916, "step": 1814 }, { "epoch": 0.844055185242598, "grad_norm": 0.5700781345367432, "learning_rate": 9.030722458776815e-06, "loss": 0.4019, "step": 1815 }, { "epoch": 0.844520229421795, "grad_norm": 0.429929256439209, "learning_rate": 9.029120715080162e-06, "loss": 0.3942, "step": 1816 }, { "epoch": 0.8449852736009921, "grad_norm": 0.6056925654411316, "learning_rate": 9.027517791319499e-06, "loss": 0.3985, "step": 1817 }, { "epoch": 0.8454503177801891, "grad_norm": 0.5345470905303955, "learning_rate": 9.025913687964293e-06, "loss": 0.3734, "step": 1818 }, { "epoch": 0.8459153619593861, "grad_norm": 0.5462255477905273, "learning_rate": 9.024308405484363e-06, "loss": 0.3822, "step": 1819 }, { "epoch": 0.8463804061385831, "grad_norm": 0.5566556453704834, "learning_rate": 9.022701944349867e-06, "loss": 0.3878, "step": 1820 }, { "epoch": 0.8468454503177801, "grad_norm": 0.5047350525856018, "learning_rate": 9.021094305031314e-06, "loss": 0.3838, "step": 1821 }, { "epoch": 0.8473104944969773, "grad_norm": 0.5499956011772156, "learning_rate": 9.019485487999553e-06, "loss": 0.4, "step": 1822 }, { "epoch": 0.8477755386761743, "grad_norm": 0.49966034293174744, "learning_rate": 9.017875493725783e-06, "loss": 0.3779, "step": 1823 }, { "epoch": 0.8482405828553713, "grad_norm": 0.5475969910621643, "learning_rate": 9.016264322681543e-06, "loss": 0.3703, "step": 1824 }, { "epoch": 0.8487056270345683, "grad_norm": 0.5151212811470032, "learning_rate": 9.01465197533872e-06, "loss": 0.3803, "step": 1825 }, { "epoch": 0.8491706712137653, "grad_norm": 0.5074899196624756, "learning_rate": 9.013038452169544e-06, "loss": 0.3724, "step": 1826 }, { "epoch": 0.8496357153929623, "grad_norm": 0.49190855026245117, "learning_rate": 9.01142375364659e-06, "loss": 0.4028, "step": 1827 }, { "epoch": 0.8501007595721594, "grad_norm": 0.48248571157455444, "learning_rate": 9.009807880242777e-06, "loss": 0.3861, "step": 1828 }, { "epoch": 0.8505658037513564, "grad_norm": 0.5088501572608948, "learning_rate": 9.008190832431367e-06, "loss": 0.3925, "step": 1829 }, { "epoch": 0.8510308479305534, "grad_norm": 0.5075740218162537, "learning_rate": 9.006572610685969e-06, "loss": 0.4187, "step": 1830 }, { "epoch": 0.8514958921097504, "grad_norm": 0.6194763779640198, "learning_rate": 9.004953215480532e-06, "loss": 0.3997, "step": 1831 }, { "epoch": 0.8519609362889474, "grad_norm": 0.526176929473877, "learning_rate": 9.003332647289351e-06, "loss": 0.3969, "step": 1832 }, { "epoch": 0.8524259804681444, "grad_norm": 0.578385055065155, "learning_rate": 9.001710906587064e-06, "loss": 0.3834, "step": 1833 }, { "epoch": 0.8528910246473415, "grad_norm": 0.47598031163215637, "learning_rate": 9.000087993848655e-06, "loss": 0.3935, "step": 1834 }, { "epoch": 0.8533560688265385, "grad_norm": 0.4297819137573242, "learning_rate": 8.998463909549445e-06, "loss": 0.3873, "step": 1835 }, { "epoch": 0.8538211130057356, "grad_norm": 0.49305081367492676, "learning_rate": 8.996838654165103e-06, "loss": 0.3863, "step": 1836 }, { "epoch": 0.8542861571849326, "grad_norm": 0.5345546007156372, "learning_rate": 8.99521222817164e-06, "loss": 0.3716, "step": 1837 }, { "epoch": 0.8547512013641296, "grad_norm": 0.43835559487342834, "learning_rate": 8.993584632045412e-06, "loss": 0.3649, "step": 1838 }, { "epoch": 0.8552162455433266, "grad_norm": 0.5586456656455994, "learning_rate": 8.991955866263112e-06, "loss": 0.3804, "step": 1839 }, { "epoch": 0.8556812897225237, "grad_norm": 0.5465202331542969, "learning_rate": 8.990325931301783e-06, "loss": 0.3674, "step": 1840 }, { "epoch": 0.8561463339017207, "grad_norm": 0.40788722038269043, "learning_rate": 8.988694827638803e-06, "loss": 0.3849, "step": 1841 }, { "epoch": 0.8566113780809177, "grad_norm": 0.47962188720703125, "learning_rate": 8.987062555751896e-06, "loss": 0.3805, "step": 1842 }, { "epoch": 0.8570764222601147, "grad_norm": 0.4847969114780426, "learning_rate": 8.985429116119132e-06, "loss": 0.3694, "step": 1843 }, { "epoch": 0.8575414664393117, "grad_norm": 0.4671127200126648, "learning_rate": 8.983794509218912e-06, "loss": 0.3918, "step": 1844 }, { "epoch": 0.8580065106185087, "grad_norm": 0.473143070936203, "learning_rate": 8.982158735529991e-06, "loss": 0.382, "step": 1845 }, { "epoch": 0.8584715547977058, "grad_norm": 0.4718664586544037, "learning_rate": 8.980521795531461e-06, "loss": 0.3806, "step": 1846 }, { "epoch": 0.8589365989769028, "grad_norm": 0.4485674500465393, "learning_rate": 8.97888368970275e-06, "loss": 0.3918, "step": 1847 }, { "epoch": 0.8594016431560998, "grad_norm": 0.4322403073310852, "learning_rate": 8.977244418523638e-06, "loss": 0.3745, "step": 1848 }, { "epoch": 0.8598666873352968, "grad_norm": 0.44763296842575073, "learning_rate": 8.97560398247424e-06, "loss": 0.3607, "step": 1849 }, { "epoch": 0.8603317315144938, "grad_norm": 0.5826919674873352, "learning_rate": 8.97396238203501e-06, "loss": 0.395, "step": 1850 }, { "epoch": 0.860796775693691, "grad_norm": 0.42212069034576416, "learning_rate": 8.97231961768675e-06, "loss": 0.3603, "step": 1851 }, { "epoch": 0.861261819872888, "grad_norm": 0.49117767810821533, "learning_rate": 8.970675689910596e-06, "loss": 0.3789, "step": 1852 }, { "epoch": 0.861726864052085, "grad_norm": 0.5760953426361084, "learning_rate": 8.969030599188027e-06, "loss": 0.3962, "step": 1853 }, { "epoch": 0.862191908231282, "grad_norm": 0.4554479420185089, "learning_rate": 8.967384346000866e-06, "loss": 0.3854, "step": 1854 }, { "epoch": 0.862656952410479, "grad_norm": 0.4693755507469177, "learning_rate": 8.965736930831272e-06, "loss": 0.383, "step": 1855 }, { "epoch": 0.863121996589676, "grad_norm": 0.487821102142334, "learning_rate": 8.964088354161748e-06, "loss": 0.3819, "step": 1856 }, { "epoch": 0.863587040768873, "grad_norm": 0.46892300248146057, "learning_rate": 8.962438616475136e-06, "loss": 0.3768, "step": 1857 }, { "epoch": 0.8640520849480701, "grad_norm": 0.44840800762176514, "learning_rate": 8.960787718254615e-06, "loss": 0.3779, "step": 1858 }, { "epoch": 0.8645171291272671, "grad_norm": 0.4931188225746155, "learning_rate": 8.959135659983706e-06, "loss": 0.3753, "step": 1859 }, { "epoch": 0.8649821733064641, "grad_norm": 0.4526351988315582, "learning_rate": 8.957482442146271e-06, "loss": 0.3664, "step": 1860 }, { "epoch": 0.8654472174856611, "grad_norm": 0.46007290482521057, "learning_rate": 8.955828065226512e-06, "loss": 0.3647, "step": 1861 }, { "epoch": 0.8659122616648581, "grad_norm": 0.4510989487171173, "learning_rate": 8.954172529708967e-06, "loss": 0.3839, "step": 1862 }, { "epoch": 0.8663773058440551, "grad_norm": 0.46228083968162537, "learning_rate": 8.952515836078516e-06, "loss": 0.3681, "step": 1863 }, { "epoch": 0.8668423500232522, "grad_norm": 0.5067192912101746, "learning_rate": 8.950857984820378e-06, "loss": 0.3969, "step": 1864 }, { "epoch": 0.8673073942024492, "grad_norm": 0.5053953528404236, "learning_rate": 8.94919897642011e-06, "loss": 0.3643, "step": 1865 }, { "epoch": 0.8677724383816463, "grad_norm": 0.5040225386619568, "learning_rate": 8.947538811363612e-06, "loss": 0.4229, "step": 1866 }, { "epoch": 0.8682374825608433, "grad_norm": 0.5020115971565247, "learning_rate": 8.945877490137113e-06, "loss": 0.3804, "step": 1867 }, { "epoch": 0.8687025267400403, "grad_norm": 0.518023669719696, "learning_rate": 8.944215013227193e-06, "loss": 0.4007, "step": 1868 }, { "epoch": 0.8691675709192374, "grad_norm": 0.4483552873134613, "learning_rate": 8.942551381120763e-06, "loss": 0.3953, "step": 1869 }, { "epoch": 0.8696326150984344, "grad_norm": 0.5058504939079285, "learning_rate": 8.94088659430507e-06, "loss": 0.3782, "step": 1870 }, { "epoch": 0.8700976592776314, "grad_norm": 0.46518880128860474, "learning_rate": 8.939220653267708e-06, "loss": 0.3827, "step": 1871 }, { "epoch": 0.8705627034568284, "grad_norm": 0.4913824200630188, "learning_rate": 8.937553558496602e-06, "loss": 0.3815, "step": 1872 }, { "epoch": 0.8710277476360254, "grad_norm": 0.5250646471977234, "learning_rate": 8.935885310480018e-06, "loss": 0.3706, "step": 1873 }, { "epoch": 0.8714927918152224, "grad_norm": 0.44793933629989624, "learning_rate": 8.934215909706554e-06, "loss": 0.3856, "step": 1874 }, { "epoch": 0.8719578359944195, "grad_norm": 0.5750781297683716, "learning_rate": 8.932545356665157e-06, "loss": 0.3723, "step": 1875 }, { "epoch": 0.8724228801736165, "grad_norm": 0.46956464648246765, "learning_rate": 8.930873651845101e-06, "loss": 0.3936, "step": 1876 }, { "epoch": 0.8728879243528135, "grad_norm": 0.4354320168495178, "learning_rate": 8.929200795736003e-06, "loss": 0.3627, "step": 1877 }, { "epoch": 0.8733529685320105, "grad_norm": 0.5778939127922058, "learning_rate": 8.927526788827814e-06, "loss": 0.3856, "step": 1878 }, { "epoch": 0.8738180127112075, "grad_norm": 0.4760781526565552, "learning_rate": 8.925851631610825e-06, "loss": 0.3778, "step": 1879 }, { "epoch": 0.8742830568904046, "grad_norm": 0.48736944794654846, "learning_rate": 8.92417532457566e-06, "loss": 0.3975, "step": 1880 }, { "epoch": 0.8747481010696017, "grad_norm": 0.4641439914703369, "learning_rate": 8.922497868213284e-06, "loss": 0.3756, "step": 1881 }, { "epoch": 0.8752131452487987, "grad_norm": 0.5082623362541199, "learning_rate": 8.920819263014995e-06, "loss": 0.3836, "step": 1882 }, { "epoch": 0.8756781894279957, "grad_norm": 0.5371342301368713, "learning_rate": 8.919139509472433e-06, "loss": 0.3863, "step": 1883 }, { "epoch": 0.8761432336071927, "grad_norm": 0.44863003492355347, "learning_rate": 8.917458608077566e-06, "loss": 0.3654, "step": 1884 }, { "epoch": 0.8766082777863897, "grad_norm": 0.48676854372024536, "learning_rate": 8.915776559322704e-06, "loss": 0.3949, "step": 1885 }, { "epoch": 0.8770733219655867, "grad_norm": 0.4752778708934784, "learning_rate": 8.914093363700493e-06, "loss": 0.3897, "step": 1886 }, { "epoch": 0.8775383661447838, "grad_norm": 0.4733431935310364, "learning_rate": 8.912409021703914e-06, "loss": 0.3728, "step": 1887 }, { "epoch": 0.8780034103239808, "grad_norm": 0.44890302419662476, "learning_rate": 8.910723533826281e-06, "loss": 0.3891, "step": 1888 }, { "epoch": 0.8784684545031778, "grad_norm": 0.574249804019928, "learning_rate": 8.909036900561248e-06, "loss": 0.3874, "step": 1889 }, { "epoch": 0.8789334986823748, "grad_norm": 0.5202791094779968, "learning_rate": 8.907349122402803e-06, "loss": 0.3842, "step": 1890 }, { "epoch": 0.8793985428615718, "grad_norm": 0.46931275725364685, "learning_rate": 8.905660199845265e-06, "loss": 0.3718, "step": 1891 }, { "epoch": 0.8798635870407688, "grad_norm": 0.5120000243186951, "learning_rate": 8.903970133383297e-06, "loss": 0.3927, "step": 1892 }, { "epoch": 0.8803286312199659, "grad_norm": 0.44593966007232666, "learning_rate": 8.902278923511888e-06, "loss": 0.388, "step": 1893 }, { "epoch": 0.8807936753991629, "grad_norm": 0.4737493097782135, "learning_rate": 8.900586570726369e-06, "loss": 0.3728, "step": 1894 }, { "epoch": 0.88125871957836, "grad_norm": 0.4651558995246887, "learning_rate": 8.8988930755224e-06, "loss": 0.3921, "step": 1895 }, { "epoch": 0.881723763757557, "grad_norm": 0.5006027817726135, "learning_rate": 8.897198438395983e-06, "loss": 0.3782, "step": 1896 }, { "epoch": 0.882188807936754, "grad_norm": 0.444982647895813, "learning_rate": 8.895502659843442e-06, "loss": 0.3813, "step": 1897 }, { "epoch": 0.882653852115951, "grad_norm": 0.4869557023048401, "learning_rate": 8.89380574036145e-06, "loss": 0.4084, "step": 1898 }, { "epoch": 0.8831188962951481, "grad_norm": 0.496622771024704, "learning_rate": 8.892107680447005e-06, "loss": 0.392, "step": 1899 }, { "epoch": 0.8835839404743451, "grad_norm": 0.49721208214759827, "learning_rate": 8.890408480597437e-06, "loss": 0.3591, "step": 1900 }, { "epoch": 0.8840489846535421, "grad_norm": 0.5105478763580322, "learning_rate": 8.88870814131042e-06, "loss": 0.387, "step": 1901 }, { "epoch": 0.8845140288327391, "grad_norm": 0.5221246480941772, "learning_rate": 8.887006663083952e-06, "loss": 0.373, "step": 1902 }, { "epoch": 0.8849790730119361, "grad_norm": 0.4854063391685486, "learning_rate": 8.885304046416369e-06, "loss": 0.3778, "step": 1903 }, { "epoch": 0.8854441171911331, "grad_norm": 0.4377305209636688, "learning_rate": 8.883600291806344e-06, "loss": 0.4064, "step": 1904 }, { "epoch": 0.8859091613703302, "grad_norm": 0.4939655065536499, "learning_rate": 8.881895399752873e-06, "loss": 0.3839, "step": 1905 }, { "epoch": 0.8863742055495272, "grad_norm": 0.5449526906013489, "learning_rate": 8.880189370755293e-06, "loss": 0.3848, "step": 1906 }, { "epoch": 0.8868392497287242, "grad_norm": 0.46097972989082336, "learning_rate": 8.878482205313275e-06, "loss": 0.3829, "step": 1907 }, { "epoch": 0.8873042939079212, "grad_norm": 0.5087864995002747, "learning_rate": 8.876773903926816e-06, "loss": 0.3827, "step": 1908 }, { "epoch": 0.8877693380871183, "grad_norm": 0.5424960255622864, "learning_rate": 8.875064467096252e-06, "loss": 0.37, "step": 1909 }, { "epoch": 0.8882343822663153, "grad_norm": 0.4373340904712677, "learning_rate": 8.873353895322248e-06, "loss": 0.3723, "step": 1910 }, { "epoch": 0.8886994264455124, "grad_norm": 0.5504875183105469, "learning_rate": 8.871642189105804e-06, "loss": 0.4028, "step": 1911 }, { "epoch": 0.8891644706247094, "grad_norm": 0.5143681168556213, "learning_rate": 8.869929348948252e-06, "loss": 0.3809, "step": 1912 }, { "epoch": 0.8896295148039064, "grad_norm": 0.4348513185977936, "learning_rate": 8.868215375351251e-06, "loss": 0.3806, "step": 1913 }, { "epoch": 0.8900945589831034, "grad_norm": 0.5529665350914001, "learning_rate": 8.866500268816803e-06, "loss": 0.3994, "step": 1914 }, { "epoch": 0.8905596031623004, "grad_norm": 0.5090189576148987, "learning_rate": 8.864784029847227e-06, "loss": 0.3922, "step": 1915 }, { "epoch": 0.8910246473414974, "grad_norm": 0.4400188624858856, "learning_rate": 8.863066658945185e-06, "loss": 0.3955, "step": 1916 }, { "epoch": 0.8914896915206945, "grad_norm": 0.5315966606140137, "learning_rate": 8.861348156613667e-06, "loss": 0.3723, "step": 1917 }, { "epoch": 0.8919547356998915, "grad_norm": 0.5052797198295593, "learning_rate": 8.859628523355995e-06, "loss": 0.3741, "step": 1918 }, { "epoch": 0.8924197798790885, "grad_norm": 0.4738040864467621, "learning_rate": 8.857907759675822e-06, "loss": 0.368, "step": 1919 }, { "epoch": 0.8928848240582855, "grad_norm": 0.5382641553878784, "learning_rate": 8.85618586607713e-06, "loss": 0.3906, "step": 1920 }, { "epoch": 0.8933498682374825, "grad_norm": 0.5306910276412964, "learning_rate": 8.854462843064233e-06, "loss": 0.3685, "step": 1921 }, { "epoch": 0.8938149124166795, "grad_norm": 0.4780616760253906, "learning_rate": 8.85273869114178e-06, "loss": 0.3719, "step": 1922 }, { "epoch": 0.8942799565958766, "grad_norm": 0.5241276621818542, "learning_rate": 8.851013410814745e-06, "loss": 0.3668, "step": 1923 }, { "epoch": 0.8947450007750737, "grad_norm": 0.547930896282196, "learning_rate": 8.84928700258843e-06, "loss": 0.3678, "step": 1924 }, { "epoch": 0.8952100449542707, "grad_norm": 0.5266174077987671, "learning_rate": 8.847559466968482e-06, "loss": 0.3808, "step": 1925 }, { "epoch": 0.8956750891334677, "grad_norm": 0.4367474317550659, "learning_rate": 8.845830804460861e-06, "loss": 0.3975, "step": 1926 }, { "epoch": 0.8961401333126647, "grad_norm": 0.4753339886665344, "learning_rate": 8.844101015571867e-06, "loss": 0.3894, "step": 1927 }, { "epoch": 0.8966051774918617, "grad_norm": 0.5285556316375732, "learning_rate": 8.842370100808123e-06, "loss": 0.3891, "step": 1928 }, { "epoch": 0.8970702216710588, "grad_norm": 0.48880597949028015, "learning_rate": 8.84063806067659e-06, "loss": 0.3949, "step": 1929 }, { "epoch": 0.8975352658502558, "grad_norm": 0.46564510464668274, "learning_rate": 8.838904895684555e-06, "loss": 0.4002, "step": 1930 }, { "epoch": 0.8980003100294528, "grad_norm": 0.4695758521556854, "learning_rate": 8.837170606339628e-06, "loss": 0.3763, "step": 1931 }, { "epoch": 0.8984653542086498, "grad_norm": 0.5208839178085327, "learning_rate": 8.835435193149762e-06, "loss": 0.3677, "step": 1932 }, { "epoch": 0.8989303983878468, "grad_norm": 0.5239862203598022, "learning_rate": 8.833698656623227e-06, "loss": 0.3733, "step": 1933 }, { "epoch": 0.8993954425670438, "grad_norm": 0.4721670150756836, "learning_rate": 8.831960997268625e-06, "loss": 0.3773, "step": 1934 }, { "epoch": 0.8998604867462409, "grad_norm": 0.5257996320724487, "learning_rate": 8.83022221559489e-06, "loss": 0.3535, "step": 1935 }, { "epoch": 0.9003255309254379, "grad_norm": 0.5932273268699646, "learning_rate": 8.828482312111285e-06, "loss": 0.3875, "step": 1936 }, { "epoch": 0.9007905751046349, "grad_norm": 0.4747033417224884, "learning_rate": 8.826741287327396e-06, "loss": 0.3726, "step": 1937 }, { "epoch": 0.9012556192838319, "grad_norm": 0.4879438877105713, "learning_rate": 8.824999141753144e-06, "loss": 0.3816, "step": 1938 }, { "epoch": 0.901720663463029, "grad_norm": 0.5382803678512573, "learning_rate": 8.82325587589877e-06, "loss": 0.4059, "step": 1939 }, { "epoch": 0.902185707642226, "grad_norm": 0.5141168236732483, "learning_rate": 8.821511490274854e-06, "loss": 0.3807, "step": 1940 }, { "epoch": 0.9026507518214231, "grad_norm": 0.5135001540184021, "learning_rate": 8.819765985392297e-06, "loss": 0.4041, "step": 1941 }, { "epoch": 0.9031157960006201, "grad_norm": 0.5456048250198364, "learning_rate": 8.818019361762325e-06, "loss": 0.4146, "step": 1942 }, { "epoch": 0.9035808401798171, "grad_norm": 0.5352010726928711, "learning_rate": 8.816271619896502e-06, "loss": 0.385, "step": 1943 }, { "epoch": 0.9040458843590141, "grad_norm": 0.43585771322250366, "learning_rate": 8.814522760306708e-06, "loss": 0.3692, "step": 1944 }, { "epoch": 0.9045109285382111, "grad_norm": 0.47126439213752747, "learning_rate": 8.812772783505158e-06, "loss": 0.3653, "step": 1945 }, { "epoch": 0.9049759727174082, "grad_norm": 0.5107633471488953, "learning_rate": 8.811021690004389e-06, "loss": 0.3952, "step": 1946 }, { "epoch": 0.9054410168966052, "grad_norm": 0.482185423374176, "learning_rate": 8.80926948031727e-06, "loss": 0.3861, "step": 1947 }, { "epoch": 0.9059060610758022, "grad_norm": 0.4702143669128418, "learning_rate": 8.807516154956997e-06, "loss": 0.3701, "step": 1948 }, { "epoch": 0.9063711052549992, "grad_norm": 0.5154238343238831, "learning_rate": 8.80576171443709e-06, "loss": 0.3809, "step": 1949 }, { "epoch": 0.9068361494341962, "grad_norm": 0.49696359038352966, "learning_rate": 8.80400615927139e-06, "loss": 0.3743, "step": 1950 }, { "epoch": 0.9073011936133932, "grad_norm": 0.49918052554130554, "learning_rate": 8.802249489974078e-06, "loss": 0.4045, "step": 1951 }, { "epoch": 0.9077662377925902, "grad_norm": 0.45246899127960205, "learning_rate": 8.80049170705965e-06, "loss": 0.3693, "step": 1952 }, { "epoch": 0.9082312819717874, "grad_norm": 0.4851575791835785, "learning_rate": 8.798732811042934e-06, "loss": 0.4069, "step": 1953 }, { "epoch": 0.9086963261509844, "grad_norm": 0.5256077647209167, "learning_rate": 8.796972802439079e-06, "loss": 0.3695, "step": 1954 }, { "epoch": 0.9091613703301814, "grad_norm": 0.4606427848339081, "learning_rate": 8.795211681763565e-06, "loss": 0.3749, "step": 1955 }, { "epoch": 0.9096264145093784, "grad_norm": 0.4951748847961426, "learning_rate": 8.793449449532198e-06, "loss": 0.4129, "step": 1956 }, { "epoch": 0.9100914586885754, "grad_norm": 0.4839325547218323, "learning_rate": 8.791686106261104e-06, "loss": 0.3883, "step": 1957 }, { "epoch": 0.9105565028677725, "grad_norm": 0.4781914949417114, "learning_rate": 8.789921652466738e-06, "loss": 0.3883, "step": 1958 }, { "epoch": 0.9110215470469695, "grad_norm": 0.4980541467666626, "learning_rate": 8.78815608866588e-06, "loss": 0.3689, "step": 1959 }, { "epoch": 0.9114865912261665, "grad_norm": 0.523271381855011, "learning_rate": 8.786389415375636e-06, "loss": 0.3942, "step": 1960 }, { "epoch": 0.9119516354053635, "grad_norm": 0.5033456683158875, "learning_rate": 8.784621633113434e-06, "loss": 0.3624, "step": 1961 }, { "epoch": 0.9124166795845605, "grad_norm": 0.441829651594162, "learning_rate": 8.78285274239703e-06, "loss": 0.371, "step": 1962 }, { "epoch": 0.9128817237637575, "grad_norm": 0.49837011098861694, "learning_rate": 8.781082743744505e-06, "loss": 0.4078, "step": 1963 }, { "epoch": 0.9133467679429546, "grad_norm": 0.46190333366394043, "learning_rate": 8.779311637674259e-06, "loss": 0.397, "step": 1964 }, { "epoch": 0.9138118121221516, "grad_norm": 0.5549447536468506, "learning_rate": 8.777539424705022e-06, "loss": 0.3938, "step": 1965 }, { "epoch": 0.9142768563013486, "grad_norm": 0.4479234516620636, "learning_rate": 8.775766105355849e-06, "loss": 0.3767, "step": 1966 }, { "epoch": 0.9147419004805456, "grad_norm": 0.4744507372379303, "learning_rate": 8.773991680146113e-06, "loss": 0.3853, "step": 1967 }, { "epoch": 0.9152069446597427, "grad_norm": 0.4301864206790924, "learning_rate": 8.772216149595515e-06, "loss": 0.3836, "step": 1968 }, { "epoch": 0.9156719888389397, "grad_norm": 0.4343145489692688, "learning_rate": 8.77043951422408e-06, "loss": 0.3789, "step": 1969 }, { "epoch": 0.9161370330181368, "grad_norm": 0.43467792868614197, "learning_rate": 8.768661774552155e-06, "loss": 0.3736, "step": 1970 }, { "epoch": 0.9166020771973338, "grad_norm": 0.43103089928627014, "learning_rate": 8.766882931100411e-06, "loss": 0.3627, "step": 1971 }, { "epoch": 0.9170671213765308, "grad_norm": 0.43862855434417725, "learning_rate": 8.765102984389842e-06, "loss": 0.3811, "step": 1972 }, { "epoch": 0.9175321655557278, "grad_norm": 0.48002341389656067, "learning_rate": 8.763321934941766e-06, "loss": 0.4119, "step": 1973 }, { "epoch": 0.9179972097349248, "grad_norm": 0.43228980898857117, "learning_rate": 8.761539783277825e-06, "loss": 0.3808, "step": 1974 }, { "epoch": 0.9184622539141218, "grad_norm": 0.43084922432899475, "learning_rate": 8.75975652991998e-06, "loss": 0.3998, "step": 1975 }, { "epoch": 0.9189272980933189, "grad_norm": 0.4248911738395691, "learning_rate": 8.757972175390516e-06, "loss": 0.4031, "step": 1976 }, { "epoch": 0.9193923422725159, "grad_norm": 0.4406249523162842, "learning_rate": 8.756186720212045e-06, "loss": 0.3819, "step": 1977 }, { "epoch": 0.9198573864517129, "grad_norm": 0.4917864501476288, "learning_rate": 8.754400164907496e-06, "loss": 0.3958, "step": 1978 }, { "epoch": 0.9203224306309099, "grad_norm": 0.44568273425102234, "learning_rate": 8.752612510000123e-06, "loss": 0.3707, "step": 1979 }, { "epoch": 0.9207874748101069, "grad_norm": 0.5774033069610596, "learning_rate": 8.750823756013498e-06, "loss": 0.3855, "step": 1980 }, { "epoch": 0.9212525189893039, "grad_norm": 0.4670921564102173, "learning_rate": 8.749033903471522e-06, "loss": 0.4012, "step": 1981 }, { "epoch": 0.921717563168501, "grad_norm": 0.45798271894454956, "learning_rate": 8.74724295289841e-06, "loss": 0.3566, "step": 1982 }, { "epoch": 0.9221826073476981, "grad_norm": 0.47769033908843994, "learning_rate": 8.745450904818705e-06, "loss": 0.3623, "step": 1983 }, { "epoch": 0.9226476515268951, "grad_norm": 0.42745545506477356, "learning_rate": 8.743657759757267e-06, "loss": 0.3977, "step": 1984 }, { "epoch": 0.9231126957060921, "grad_norm": 0.4700676500797272, "learning_rate": 8.741863518239283e-06, "loss": 0.3815, "step": 1985 }, { "epoch": 0.9235777398852891, "grad_norm": 0.4442138671875, "learning_rate": 8.740068180790252e-06, "loss": 0.3957, "step": 1986 }, { "epoch": 0.9240427840644861, "grad_norm": 0.42371588945388794, "learning_rate": 8.738271747936001e-06, "loss": 0.3715, "step": 1987 }, { "epoch": 0.9245078282436832, "grad_norm": 0.476406067609787, "learning_rate": 8.736474220202675e-06, "loss": 0.3946, "step": 1988 }, { "epoch": 0.9249728724228802, "grad_norm": 0.4362446367740631, "learning_rate": 8.734675598116743e-06, "loss": 0.3665, "step": 1989 }, { "epoch": 0.9254379166020772, "grad_norm": 0.4169759154319763, "learning_rate": 8.732875882204993e-06, "loss": 0.3747, "step": 1990 }, { "epoch": 0.9259029607812742, "grad_norm": 0.4496033191680908, "learning_rate": 8.73107507299453e-06, "loss": 0.3798, "step": 1991 }, { "epoch": 0.9263680049604712, "grad_norm": 0.47536760568618774, "learning_rate": 8.729273171012782e-06, "loss": 0.3914, "step": 1992 }, { "epoch": 0.9268330491396682, "grad_norm": 0.4569942355155945, "learning_rate": 8.727470176787498e-06, "loss": 0.3665, "step": 1993 }, { "epoch": 0.9272980933188653, "grad_norm": 0.46827104687690735, "learning_rate": 8.725666090846746e-06, "loss": 0.3673, "step": 1994 }, { "epoch": 0.9277631374980623, "grad_norm": 0.5109233856201172, "learning_rate": 8.72386091371891e-06, "loss": 0.3717, "step": 1995 }, { "epoch": 0.9282281816772593, "grad_norm": 0.4976223409175873, "learning_rate": 8.7220546459327e-06, "loss": 0.3748, "step": 1996 }, { "epoch": 0.9286932258564564, "grad_norm": 0.42419055104255676, "learning_rate": 8.720247288017143e-06, "loss": 0.3478, "step": 1997 }, { "epoch": 0.9291582700356534, "grad_norm": 0.5530095100402832, "learning_rate": 8.718438840501585e-06, "loss": 0.3746, "step": 1998 }, { "epoch": 0.9296233142148504, "grad_norm": 0.44324877858161926, "learning_rate": 8.716629303915689e-06, "loss": 0.3711, "step": 1999 }, { "epoch": 0.9300883583940475, "grad_norm": 0.4611654281616211, "learning_rate": 8.71481867878944e-06, "loss": 0.3642, "step": 2000 }, { "epoch": 0.9305534025732445, "grad_norm": 0.5441577434539795, "learning_rate": 8.71300696565314e-06, "loss": 0.373, "step": 2001 }, { "epoch": 0.9310184467524415, "grad_norm": 0.5150545239448547, "learning_rate": 8.71119416503741e-06, "loss": 0.366, "step": 2002 }, { "epoch": 0.9314834909316385, "grad_norm": 0.5382184386253357, "learning_rate": 8.709380277473191e-06, "loss": 0.3885, "step": 2003 }, { "epoch": 0.9319485351108355, "grad_norm": 0.5589922070503235, "learning_rate": 8.707565303491741e-06, "loss": 0.3871, "step": 2004 }, { "epoch": 0.9324135792900325, "grad_norm": 0.5470889210700989, "learning_rate": 8.705749243624635e-06, "loss": 0.4045, "step": 2005 }, { "epoch": 0.9328786234692296, "grad_norm": 0.5791128873825073, "learning_rate": 8.70393209840377e-06, "loss": 0.3931, "step": 2006 }, { "epoch": 0.9333436676484266, "grad_norm": 0.4637211859226227, "learning_rate": 8.702113868361357e-06, "loss": 0.3823, "step": 2007 }, { "epoch": 0.9338087118276236, "grad_norm": 0.5710762143135071, "learning_rate": 8.700294554029926e-06, "loss": 0.381, "step": 2008 }, { "epoch": 0.9342737560068206, "grad_norm": 0.454738050699234, "learning_rate": 8.698474155942325e-06, "loss": 0.3765, "step": 2009 }, { "epoch": 0.9347388001860176, "grad_norm": 0.5094452500343323, "learning_rate": 8.696652674631716e-06, "loss": 0.4076, "step": 2010 }, { "epoch": 0.9352038443652146, "grad_norm": 0.46923717856407166, "learning_rate": 8.694830110631587e-06, "loss": 0.3796, "step": 2011 }, { "epoch": 0.9356688885444118, "grad_norm": 0.5491566061973572, "learning_rate": 8.693006464475732e-06, "loss": 0.3716, "step": 2012 }, { "epoch": 0.9361339327236088, "grad_norm": 0.5266678929328918, "learning_rate": 8.691181736698272e-06, "loss": 0.3616, "step": 2013 }, { "epoch": 0.9365989769028058, "grad_norm": 0.4861200153827667, "learning_rate": 8.689355927833636e-06, "loss": 0.3638, "step": 2014 }, { "epoch": 0.9370640210820028, "grad_norm": 0.5367874503135681, "learning_rate": 8.687529038416575e-06, "loss": 0.3787, "step": 2015 }, { "epoch": 0.9375290652611998, "grad_norm": 0.47685980796813965, "learning_rate": 8.685701068982158e-06, "loss": 0.3904, "step": 2016 }, { "epoch": 0.9379941094403969, "grad_norm": 0.4525419771671295, "learning_rate": 8.683872020065763e-06, "loss": 0.4035, "step": 2017 }, { "epoch": 0.9384591536195939, "grad_norm": 0.45574548840522766, "learning_rate": 8.682041892203093e-06, "loss": 0.4004, "step": 2018 }, { "epoch": 0.9389241977987909, "grad_norm": 0.4963834285736084, "learning_rate": 8.68021068593016e-06, "loss": 0.3706, "step": 2019 }, { "epoch": 0.9393892419779879, "grad_norm": 0.46662190556526184, "learning_rate": 8.678378401783293e-06, "loss": 0.4034, "step": 2020 }, { "epoch": 0.9398542861571849, "grad_norm": 0.45267191529273987, "learning_rate": 8.676545040299145e-06, "loss": 0.3866, "step": 2021 }, { "epoch": 0.9403193303363819, "grad_norm": 0.4719865620136261, "learning_rate": 8.674710602014672e-06, "loss": 0.3831, "step": 2022 }, { "epoch": 0.940784374515579, "grad_norm": 0.42862021923065186, "learning_rate": 8.67287508746715e-06, "loss": 0.3466, "step": 2023 }, { "epoch": 0.941249418694776, "grad_norm": 0.48866480588912964, "learning_rate": 8.671038497194175e-06, "loss": 0.3776, "step": 2024 }, { "epoch": 0.941714462873973, "grad_norm": 0.4378620684146881, "learning_rate": 8.669200831733655e-06, "loss": 0.3899, "step": 2025 }, { "epoch": 0.9421795070531701, "grad_norm": 0.45231664180755615, "learning_rate": 8.66736209162381e-06, "loss": 0.3469, "step": 2026 }, { "epoch": 0.9426445512323671, "grad_norm": 0.560242235660553, "learning_rate": 8.665522277403177e-06, "loss": 0.3633, "step": 2027 }, { "epoch": 0.9431095954115641, "grad_norm": 0.431618332862854, "learning_rate": 8.66368138961061e-06, "loss": 0.3988, "step": 2028 }, { "epoch": 0.9435746395907612, "grad_norm": 0.46893802285194397, "learning_rate": 8.661839428785273e-06, "loss": 0.3829, "step": 2029 }, { "epoch": 0.9440396837699582, "grad_norm": 0.506652295589447, "learning_rate": 8.659996395466648e-06, "loss": 0.3853, "step": 2030 }, { "epoch": 0.9445047279491552, "grad_norm": 0.5342637896537781, "learning_rate": 8.658152290194526e-06, "loss": 0.4173, "step": 2031 }, { "epoch": 0.9449697721283522, "grad_norm": 0.43193793296813965, "learning_rate": 8.656307113509021e-06, "loss": 0.367, "step": 2032 }, { "epoch": 0.9454348163075492, "grad_norm": 0.43902087211608887, "learning_rate": 8.654460865950551e-06, "loss": 0.3629, "step": 2033 }, { "epoch": 0.9458998604867462, "grad_norm": 0.5017895698547363, "learning_rate": 8.652613548059854e-06, "loss": 0.3706, "step": 2034 }, { "epoch": 0.9463649046659433, "grad_norm": 0.45130717754364014, "learning_rate": 8.650765160377978e-06, "loss": 0.3811, "step": 2035 }, { "epoch": 0.9468299488451403, "grad_norm": 0.4452032744884491, "learning_rate": 8.648915703446287e-06, "loss": 0.3904, "step": 2036 }, { "epoch": 0.9472949930243373, "grad_norm": 0.5498278141021729, "learning_rate": 8.647065177806457e-06, "loss": 0.3611, "step": 2037 }, { "epoch": 0.9477600372035343, "grad_norm": 0.41174963116645813, "learning_rate": 8.645213584000476e-06, "loss": 0.3793, "step": 2038 }, { "epoch": 0.9482250813827313, "grad_norm": 0.5462541580200195, "learning_rate": 8.643360922570646e-06, "loss": 0.3601, "step": 2039 }, { "epoch": 0.9486901255619283, "grad_norm": 0.5303115844726562, "learning_rate": 8.64150719405958e-06, "loss": 0.3918, "step": 2040 }, { "epoch": 0.9491551697411255, "grad_norm": 0.45511001348495483, "learning_rate": 8.639652399010208e-06, "loss": 0.4027, "step": 2041 }, { "epoch": 0.9496202139203225, "grad_norm": 0.5173806548118591, "learning_rate": 8.637796537965768e-06, "loss": 0.372, "step": 2042 }, { "epoch": 0.9500852580995195, "grad_norm": 0.45395195484161377, "learning_rate": 8.63593961146981e-06, "loss": 0.3794, "step": 2043 }, { "epoch": 0.9505503022787165, "grad_norm": 0.5278088450431824, "learning_rate": 8.634081620066199e-06, "loss": 0.388, "step": 2044 }, { "epoch": 0.9510153464579135, "grad_norm": 0.4131206274032593, "learning_rate": 8.632222564299111e-06, "loss": 0.3784, "step": 2045 }, { "epoch": 0.9514803906371105, "grad_norm": 0.4544680118560791, "learning_rate": 8.630362444713033e-06, "loss": 0.3816, "step": 2046 }, { "epoch": 0.9519454348163076, "grad_norm": 0.534386396408081, "learning_rate": 8.628501261852765e-06, "loss": 0.3543, "step": 2047 }, { "epoch": 0.9524104789955046, "grad_norm": 0.44800546765327454, "learning_rate": 8.626639016263413e-06, "loss": 0.3832, "step": 2048 }, { "epoch": 0.9528755231747016, "grad_norm": 0.5088453888893127, "learning_rate": 8.624775708490403e-06, "loss": 0.3755, "step": 2049 }, { "epoch": 0.9533405673538986, "grad_norm": 0.5941973328590393, "learning_rate": 8.622911339079464e-06, "loss": 0.3948, "step": 2050 }, { "epoch": 0.9538056115330956, "grad_norm": 0.45851653814315796, "learning_rate": 8.621045908576642e-06, "loss": 0.3968, "step": 2051 }, { "epoch": 0.9542706557122926, "grad_norm": 0.5592479109764099, "learning_rate": 8.619179417528293e-06, "loss": 0.3982, "step": 2052 }, { "epoch": 0.9547356998914897, "grad_norm": 0.5447689294815063, "learning_rate": 8.617311866481076e-06, "loss": 0.3877, "step": 2053 }, { "epoch": 0.9552007440706867, "grad_norm": 0.4597291052341461, "learning_rate": 8.61544325598197e-06, "loss": 0.3644, "step": 2054 }, { "epoch": 0.9556657882498837, "grad_norm": 0.5028977394104004, "learning_rate": 8.613573586578262e-06, "loss": 0.3604, "step": 2055 }, { "epoch": 0.9561308324290808, "grad_norm": 0.5437200665473938, "learning_rate": 8.611702858817545e-06, "loss": 0.3916, "step": 2056 }, { "epoch": 0.9565958766082778, "grad_norm": 0.4929006099700928, "learning_rate": 8.609831073247728e-06, "loss": 0.3633, "step": 2057 }, { "epoch": 0.9570609207874748, "grad_norm": 0.4636727273464203, "learning_rate": 8.607958230417024e-06, "loss": 0.3587, "step": 2058 }, { "epoch": 0.9575259649666719, "grad_norm": 0.39754369854927063, "learning_rate": 8.606084330873958e-06, "loss": 0.39, "step": 2059 }, { "epoch": 0.9579910091458689, "grad_norm": 0.47713038325309753, "learning_rate": 8.604209375167366e-06, "loss": 0.3848, "step": 2060 }, { "epoch": 0.9584560533250659, "grad_norm": 0.4594153165817261, "learning_rate": 8.602333363846393e-06, "loss": 0.3805, "step": 2061 }, { "epoch": 0.9589210975042629, "grad_norm": 0.4612623453140259, "learning_rate": 8.600456297460491e-06, "loss": 0.3809, "step": 2062 }, { "epoch": 0.9593861416834599, "grad_norm": 0.4625248312950134, "learning_rate": 8.598578176559423e-06, "loss": 0.3529, "step": 2063 }, { "epoch": 0.9598511858626569, "grad_norm": 0.4562045633792877, "learning_rate": 8.596699001693257e-06, "loss": 0.367, "step": 2064 }, { "epoch": 0.960316230041854, "grad_norm": 0.4903472065925598, "learning_rate": 8.594818773412376e-06, "loss": 0.3759, "step": 2065 }, { "epoch": 0.960781274221051, "grad_norm": 0.4430171251296997, "learning_rate": 8.592937492267466e-06, "loss": 0.3993, "step": 2066 }, { "epoch": 0.961246318400248, "grad_norm": 0.4540698826313019, "learning_rate": 8.591055158809526e-06, "loss": 0.3555, "step": 2067 }, { "epoch": 0.961711362579445, "grad_norm": 0.4455225169658661, "learning_rate": 8.589171773589861e-06, "loss": 0.3937, "step": 2068 }, { "epoch": 0.962176406758642, "grad_norm": 0.45158520340919495, "learning_rate": 8.587287337160083e-06, "loss": 0.3694, "step": 2069 }, { "epoch": 0.9626414509378391, "grad_norm": 0.44573891162872314, "learning_rate": 8.585401850072114e-06, "loss": 0.364, "step": 2070 }, { "epoch": 0.9631064951170362, "grad_norm": 0.44659656286239624, "learning_rate": 8.58351531287818e-06, "loss": 0.393, "step": 2071 }, { "epoch": 0.9635715392962332, "grad_norm": 0.4550364911556244, "learning_rate": 8.581627726130817e-06, "loss": 0.3919, "step": 2072 }, { "epoch": 0.9640365834754302, "grad_norm": 0.4669656455516815, "learning_rate": 8.579739090382873e-06, "loss": 0.4129, "step": 2073 }, { "epoch": 0.9645016276546272, "grad_norm": 0.45073822140693665, "learning_rate": 8.577849406187493e-06, "loss": 0.3963, "step": 2074 }, { "epoch": 0.9649666718338242, "grad_norm": 0.4639911949634552, "learning_rate": 8.575958674098138e-06, "loss": 0.3857, "step": 2075 }, { "epoch": 0.9654317160130212, "grad_norm": 0.4226040244102478, "learning_rate": 8.574066894668573e-06, "loss": 0.3762, "step": 2076 }, { "epoch": 0.9658967601922183, "grad_norm": 0.42542764544487, "learning_rate": 8.572174068452867e-06, "loss": 0.3502, "step": 2077 }, { "epoch": 0.9663618043714153, "grad_norm": 0.46741747856140137, "learning_rate": 8.570280196005403e-06, "loss": 0.3717, "step": 2078 }, { "epoch": 0.9668268485506123, "grad_norm": 0.4867299199104309, "learning_rate": 8.568385277880859e-06, "loss": 0.3835, "step": 2079 }, { "epoch": 0.9672918927298093, "grad_norm": 0.4860565662384033, "learning_rate": 8.56648931463423e-06, "loss": 0.3879, "step": 2080 }, { "epoch": 0.9677569369090063, "grad_norm": 0.44712382555007935, "learning_rate": 8.564592306820813e-06, "loss": 0.3715, "step": 2081 }, { "epoch": 0.9682219810882033, "grad_norm": 0.4493529498577118, "learning_rate": 8.562694254996208e-06, "loss": 0.3594, "step": 2082 }, { "epoch": 0.9686870252674004, "grad_norm": 0.44625014066696167, "learning_rate": 8.560795159716327e-06, "loss": 0.3816, "step": 2083 }, { "epoch": 0.9691520694465974, "grad_norm": 0.44804733991622925, "learning_rate": 8.55889502153738e-06, "loss": 0.3942, "step": 2084 }, { "epoch": 0.9696171136257945, "grad_norm": 0.43751975893974304, "learning_rate": 8.55699384101589e-06, "loss": 0.3754, "step": 2085 }, { "epoch": 0.9700821578049915, "grad_norm": 0.5086488723754883, "learning_rate": 8.555091618708681e-06, "loss": 0.3861, "step": 2086 }, { "epoch": 0.9705472019841885, "grad_norm": 0.4142938554286957, "learning_rate": 8.553188355172882e-06, "loss": 0.382, "step": 2087 }, { "epoch": 0.9710122461633856, "grad_norm": 0.4268786311149597, "learning_rate": 8.551284050965929e-06, "loss": 0.3612, "step": 2088 }, { "epoch": 0.9714772903425826, "grad_norm": 0.40908634662628174, "learning_rate": 8.54937870664556e-06, "loss": 0.3853, "step": 2089 }, { "epoch": 0.9719423345217796, "grad_norm": 0.4633747935295105, "learning_rate": 8.547472322769825e-06, "loss": 0.3561, "step": 2090 }, { "epoch": 0.9724073787009766, "grad_norm": 0.45280638337135315, "learning_rate": 8.545564899897066e-06, "loss": 0.4023, "step": 2091 }, { "epoch": 0.9728724228801736, "grad_norm": 0.5002387166023254, "learning_rate": 8.54365643858594e-06, "loss": 0.3743, "step": 2092 }, { "epoch": 0.9733374670593706, "grad_norm": 0.49016910791397095, "learning_rate": 8.541746939395403e-06, "loss": 0.3708, "step": 2093 }, { "epoch": 0.9738025112385676, "grad_norm": 0.4835710823535919, "learning_rate": 8.539836402884715e-06, "loss": 0.3901, "step": 2094 }, { "epoch": 0.9742675554177647, "grad_norm": 0.5034793615341187, "learning_rate": 8.537924829613444e-06, "loss": 0.3716, "step": 2095 }, { "epoch": 0.9747325995969617, "grad_norm": 0.5456627011299133, "learning_rate": 8.536012220141458e-06, "loss": 0.3759, "step": 2096 }, { "epoch": 0.9751976437761587, "grad_norm": 0.4535893499851227, "learning_rate": 8.534098575028928e-06, "loss": 0.3818, "step": 2097 }, { "epoch": 0.9756626879553557, "grad_norm": 0.40453192591667175, "learning_rate": 8.53218389483633e-06, "loss": 0.3686, "step": 2098 }, { "epoch": 0.9761277321345527, "grad_norm": 0.5098088979721069, "learning_rate": 8.530268180124444e-06, "loss": 0.385, "step": 2099 }, { "epoch": 0.9765927763137499, "grad_norm": 0.47901231050491333, "learning_rate": 8.528351431454352e-06, "loss": 0.3932, "step": 2100 }, { "epoch": 0.9770578204929469, "grad_norm": 0.4342050850391388, "learning_rate": 8.526433649387435e-06, "loss": 0.3771, "step": 2101 }, { "epoch": 0.9775228646721439, "grad_norm": 0.424624502658844, "learning_rate": 8.524514834485382e-06, "loss": 0.3976, "step": 2102 }, { "epoch": 0.9779879088513409, "grad_norm": 0.4495830237865448, "learning_rate": 8.522594987310184e-06, "loss": 0.3901, "step": 2103 }, { "epoch": 0.9784529530305379, "grad_norm": 0.46972575783729553, "learning_rate": 8.520674108424134e-06, "loss": 0.4025, "step": 2104 }, { "epoch": 0.9789179972097349, "grad_norm": 0.46081069111824036, "learning_rate": 8.518752198389823e-06, "loss": 0.3794, "step": 2105 }, { "epoch": 0.979383041388932, "grad_norm": 0.47898101806640625, "learning_rate": 8.51682925777015e-06, "loss": 0.3701, "step": 2106 }, { "epoch": 0.979848085568129, "grad_norm": 0.4954424500465393, "learning_rate": 8.51490528712831e-06, "loss": 0.3656, "step": 2107 }, { "epoch": 0.980313129747326, "grad_norm": 0.5420515537261963, "learning_rate": 8.512980287027805e-06, "loss": 0.37, "step": 2108 }, { "epoch": 0.980778173926523, "grad_norm": 0.42437291145324707, "learning_rate": 8.511054258032436e-06, "loss": 0.395, "step": 2109 }, { "epoch": 0.98124321810572, "grad_norm": 0.4806736409664154, "learning_rate": 8.509127200706305e-06, "loss": 0.3452, "step": 2110 }, { "epoch": 0.981708262284917, "grad_norm": 0.4276758134365082, "learning_rate": 8.507199115613818e-06, "loss": 0.3351, "step": 2111 }, { "epoch": 0.982173306464114, "grad_norm": 0.484733909368515, "learning_rate": 8.505270003319676e-06, "loss": 0.3501, "step": 2112 }, { "epoch": 0.9826383506433111, "grad_norm": 0.5105139017105103, "learning_rate": 8.503339864388887e-06, "loss": 0.4193, "step": 2113 }, { "epoch": 0.9831033948225082, "grad_norm": 0.5405691266059875, "learning_rate": 8.501408699386758e-06, "loss": 0.3912, "step": 2114 }, { "epoch": 0.9835684390017052, "grad_norm": 0.438722163438797, "learning_rate": 8.499476508878894e-06, "loss": 0.3935, "step": 2115 }, { "epoch": 0.9840334831809022, "grad_norm": 0.5080367922782898, "learning_rate": 8.497543293431202e-06, "loss": 0.3764, "step": 2116 }, { "epoch": 0.9844985273600992, "grad_norm": 0.49082526564598083, "learning_rate": 8.495609053609893e-06, "loss": 0.3737, "step": 2117 }, { "epoch": 0.9849635715392963, "grad_norm": 0.4634253978729248, "learning_rate": 8.49367378998147e-06, "loss": 0.3979, "step": 2118 }, { "epoch": 0.9854286157184933, "grad_norm": 0.4806537926197052, "learning_rate": 8.491737503112744e-06, "loss": 0.3773, "step": 2119 }, { "epoch": 0.9858936598976903, "grad_norm": 0.5054664611816406, "learning_rate": 8.489800193570818e-06, "loss": 0.3704, "step": 2120 }, { "epoch": 0.9863587040768873, "grad_norm": 0.47105222940444946, "learning_rate": 8.487861861923103e-06, "loss": 0.3684, "step": 2121 }, { "epoch": 0.9868237482560843, "grad_norm": 0.4627211391925812, "learning_rate": 8.485922508737302e-06, "loss": 0.3776, "step": 2122 }, { "epoch": 0.9872887924352813, "grad_norm": 0.4847998321056366, "learning_rate": 8.483982134581419e-06, "loss": 0.3859, "step": 2123 }, { "epoch": 0.9877538366144784, "grad_norm": 0.5242019295692444, "learning_rate": 8.48204074002376e-06, "loss": 0.3859, "step": 2124 }, { "epoch": 0.9882188807936754, "grad_norm": 0.48214638233184814, "learning_rate": 8.480098325632928e-06, "loss": 0.364, "step": 2125 }, { "epoch": 0.9886839249728724, "grad_norm": 0.4689680337905884, "learning_rate": 8.478154891977825e-06, "loss": 0.3843, "step": 2126 }, { "epoch": 0.9891489691520694, "grad_norm": 0.609089195728302, "learning_rate": 8.47621043962765e-06, "loss": 0.4067, "step": 2127 }, { "epoch": 0.9896140133312664, "grad_norm": 0.4945160150527954, "learning_rate": 8.474264969151902e-06, "loss": 0.3718, "step": 2128 }, { "epoch": 0.9900790575104635, "grad_norm": 0.5477257370948792, "learning_rate": 8.472318481120377e-06, "loss": 0.3698, "step": 2129 }, { "epoch": 0.9905441016896606, "grad_norm": 0.5951197743415833, "learning_rate": 8.470370976103171e-06, "loss": 0.3752, "step": 2130 }, { "epoch": 0.9910091458688576, "grad_norm": 0.4970504343509674, "learning_rate": 8.468422454670674e-06, "loss": 0.3917, "step": 2131 }, { "epoch": 0.9914741900480546, "grad_norm": 0.48202067613601685, "learning_rate": 8.46647291739358e-06, "loss": 0.3855, "step": 2132 }, { "epoch": 0.9919392342272516, "grad_norm": 0.49319443106651306, "learning_rate": 8.464522364842874e-06, "loss": 0.357, "step": 2133 }, { "epoch": 0.9924042784064486, "grad_norm": 0.47641077637672424, "learning_rate": 8.462570797589842e-06, "loss": 0.3823, "step": 2134 }, { "epoch": 0.9928693225856456, "grad_norm": 0.4628462791442871, "learning_rate": 8.460618216206069e-06, "loss": 0.36, "step": 2135 }, { "epoch": 0.9933343667648427, "grad_norm": 0.4999821186065674, "learning_rate": 8.458664621263428e-06, "loss": 0.402, "step": 2136 }, { "epoch": 0.9937994109440397, "grad_norm": 0.4732115864753723, "learning_rate": 8.456710013334102e-06, "loss": 0.376, "step": 2137 }, { "epoch": 0.9942644551232367, "grad_norm": 0.42666521668434143, "learning_rate": 8.45475439299056e-06, "loss": 0.3759, "step": 2138 }, { "epoch": 0.9947294993024337, "grad_norm": 0.512298583984375, "learning_rate": 8.452797760805572e-06, "loss": 0.3646, "step": 2139 }, { "epoch": 0.9951945434816307, "grad_norm": 0.5457414984703064, "learning_rate": 8.450840117352203e-06, "loss": 0.3637, "step": 2140 }, { "epoch": 0.9956595876608277, "grad_norm": 0.4568066895008087, "learning_rate": 8.448881463203819e-06, "loss": 0.4164, "step": 2141 }, { "epoch": 0.9961246318400248, "grad_norm": 0.4378894865512848, "learning_rate": 8.446921798934074e-06, "loss": 0.3666, "step": 2142 }, { "epoch": 0.9965896760192218, "grad_norm": 0.5727525949478149, "learning_rate": 8.444961125116924e-06, "loss": 0.376, "step": 2143 }, { "epoch": 0.9970547201984189, "grad_norm": 0.5255966782569885, "learning_rate": 8.442999442326617e-06, "loss": 0.3902, "step": 2144 }, { "epoch": 0.9975197643776159, "grad_norm": 0.4558255076408386, "learning_rate": 8.441036751137697e-06, "loss": 0.3963, "step": 2145 }, { "epoch": 0.9979848085568129, "grad_norm": 0.49802669882774353, "learning_rate": 8.439073052125006e-06, "loss": 0.3579, "step": 2146 }, { "epoch": 0.99844985273601, "grad_norm": 0.5205224752426147, "learning_rate": 8.43710834586368e-06, "loss": 0.3696, "step": 2147 }, { "epoch": 0.998914896915207, "grad_norm": 0.4866522252559662, "learning_rate": 8.435142632929149e-06, "loss": 0.3896, "step": 2148 }, { "epoch": 0.999379941094404, "grad_norm": 0.4538208842277527, "learning_rate": 8.43317591389714e-06, "loss": 0.3607, "step": 2149 }, { "epoch": 0.999844985273601, "grad_norm": 0.5789209604263306, "learning_rate": 8.43120818934367e-06, "loss": 0.379, "step": 2150 }, { "epoch": 1.000310029452798, "grad_norm": 0.8949771523475647, "learning_rate": 8.429239459845053e-06, "loss": 0.5959, "step": 2151 }, { "epoch": 1.0007750736319951, "grad_norm": 0.4994892477989197, "learning_rate": 8.427269725977902e-06, "loss": 0.3726, "step": 2152 }, { "epoch": 1.001240117811192, "grad_norm": 0.49586233496665955, "learning_rate": 8.425298988319119e-06, "loss": 0.3367, "step": 2153 }, { "epoch": 1.0017051619903892, "grad_norm": 0.4944624900817871, "learning_rate": 8.423327247445898e-06, "loss": 0.3802, "step": 2154 }, { "epoch": 1.002170206169586, "grad_norm": 0.45939287543296814, "learning_rate": 8.421354503935733e-06, "loss": 0.3271, "step": 2155 }, { "epoch": 1.0026352503487832, "grad_norm": 0.5419012308120728, "learning_rate": 8.419380758366407e-06, "loss": 0.3778, "step": 2156 }, { "epoch": 1.00310029452798, "grad_norm": 0.43736547231674194, "learning_rate": 8.417406011316e-06, "loss": 0.3852, "step": 2157 }, { "epoch": 1.0035653387071772, "grad_norm": 0.5195603966712952, "learning_rate": 8.415430263362878e-06, "loss": 0.3837, "step": 2158 }, { "epoch": 1.0040303828863741, "grad_norm": 0.5915653109550476, "learning_rate": 8.413453515085712e-06, "loss": 0.3953, "step": 2159 }, { "epoch": 1.0044954270655713, "grad_norm": 0.4842241406440735, "learning_rate": 8.411475767063454e-06, "loss": 0.3372, "step": 2160 }, { "epoch": 1.0049604712447682, "grad_norm": 0.46653223037719727, "learning_rate": 8.409497019875362e-06, "loss": 0.3541, "step": 2161 }, { "epoch": 1.0054255154239653, "grad_norm": 0.4800226390361786, "learning_rate": 8.40751727410097e-06, "loss": 0.3579, "step": 2162 }, { "epoch": 1.0058905596031622, "grad_norm": 0.4519990086555481, "learning_rate": 8.405536530320118e-06, "loss": 0.3372, "step": 2163 }, { "epoch": 1.0063556037823593, "grad_norm": 0.4979685842990875, "learning_rate": 8.403554789112934e-06, "loss": 0.343, "step": 2164 }, { "epoch": 1.0068206479615562, "grad_norm": 0.4970398545265198, "learning_rate": 8.401572051059835e-06, "loss": 0.3701, "step": 2165 }, { "epoch": 1.0072856921407534, "grad_norm": 0.5320008993148804, "learning_rate": 8.399588316741535e-06, "loss": 0.3512, "step": 2166 }, { "epoch": 1.0077507363199505, "grad_norm": 0.4425172209739685, "learning_rate": 8.397603586739039e-06, "loss": 0.3165, "step": 2167 }, { "epoch": 1.0082157804991474, "grad_norm": 0.4745187759399414, "learning_rate": 8.395617861633637e-06, "loss": 0.3919, "step": 2168 }, { "epoch": 1.0086808246783445, "grad_norm": 0.5007448196411133, "learning_rate": 8.393631142006922e-06, "loss": 0.3564, "step": 2169 }, { "epoch": 1.0091458688575414, "grad_norm": 0.4433797001838684, "learning_rate": 8.391643428440766e-06, "loss": 0.367, "step": 2170 }, { "epoch": 1.0096109130367386, "grad_norm": 0.37245190143585205, "learning_rate": 8.389654721517341e-06, "loss": 0.3361, "step": 2171 }, { "epoch": 1.0100759572159355, "grad_norm": 0.5664768815040588, "learning_rate": 8.38766502181911e-06, "loss": 0.382, "step": 2172 }, { "epoch": 1.0105410013951326, "grad_norm": 0.4073793888092041, "learning_rate": 8.385674329928819e-06, "loss": 0.3291, "step": 2173 }, { "epoch": 1.0110060455743295, "grad_norm": 0.509306788444519, "learning_rate": 8.383682646429509e-06, "loss": 0.4038, "step": 2174 }, { "epoch": 1.0114710897535266, "grad_norm": 0.39777812361717224, "learning_rate": 8.381689971904514e-06, "loss": 0.3413, "step": 2175 }, { "epoch": 1.0119361339327235, "grad_norm": 0.5179665088653564, "learning_rate": 8.379696306937457e-06, "loss": 0.3944, "step": 2176 }, { "epoch": 1.0124011781119207, "grad_norm": 0.4474855661392212, "learning_rate": 8.377701652112249e-06, "loss": 0.3663, "step": 2177 }, { "epoch": 1.0128662222911176, "grad_norm": 0.4197114408016205, "learning_rate": 8.37570600801309e-06, "loss": 0.325, "step": 2178 }, { "epoch": 1.0133312664703147, "grad_norm": 0.48934638500213623, "learning_rate": 8.373709375224475e-06, "loss": 0.3751, "step": 2179 }, { "epoch": 1.0137963106495118, "grad_norm": 0.4234052002429962, "learning_rate": 8.371711754331181e-06, "loss": 0.3585, "step": 2180 }, { "epoch": 1.0142613548287087, "grad_norm": 0.511944055557251, "learning_rate": 8.369713145918284e-06, "loss": 0.3847, "step": 2181 }, { "epoch": 1.0147263990079058, "grad_norm": 0.46818193793296814, "learning_rate": 8.36771355057114e-06, "loss": 0.38, "step": 2182 }, { "epoch": 1.0151914431871027, "grad_norm": 0.41124582290649414, "learning_rate": 8.365712968875399e-06, "loss": 0.3481, "step": 2183 }, { "epoch": 1.0156564873662999, "grad_norm": 0.42587098479270935, "learning_rate": 8.363711401417e-06, "loss": 0.3544, "step": 2184 }, { "epoch": 1.0161215315454968, "grad_norm": 0.41804370284080505, "learning_rate": 8.36170884878217e-06, "loss": 0.3613, "step": 2185 }, { "epoch": 1.016586575724694, "grad_norm": 0.41786617040634155, "learning_rate": 8.359705311557421e-06, "loss": 0.3388, "step": 2186 }, { "epoch": 1.0170516199038908, "grad_norm": 0.4178462028503418, "learning_rate": 8.35770079032956e-06, "loss": 0.3509, "step": 2187 }, { "epoch": 1.017516664083088, "grad_norm": 0.3867464065551758, "learning_rate": 8.355695285685675e-06, "loss": 0.3443, "step": 2188 }, { "epoch": 1.0179817082622848, "grad_norm": 0.5491018891334534, "learning_rate": 8.35368879821315e-06, "loss": 0.3935, "step": 2189 }, { "epoch": 1.018446752441482, "grad_norm": 0.4546567499637604, "learning_rate": 8.35168132849965e-06, "loss": 0.3451, "step": 2190 }, { "epoch": 1.0189117966206789, "grad_norm": 0.426011323928833, "learning_rate": 8.349672877133131e-06, "loss": 0.315, "step": 2191 }, { "epoch": 1.019376840799876, "grad_norm": 0.43346622586250305, "learning_rate": 8.347663444701835e-06, "loss": 0.3507, "step": 2192 }, { "epoch": 1.019841884979073, "grad_norm": 0.4720572233200073, "learning_rate": 8.345653031794292e-06, "loss": 0.3445, "step": 2193 }, { "epoch": 1.02030692915827, "grad_norm": 0.4907239079475403, "learning_rate": 8.34364163899932e-06, "loss": 0.336, "step": 2194 }, { "epoch": 1.0207719733374672, "grad_norm": 0.47017717361450195, "learning_rate": 8.341629266906024e-06, "loss": 0.4029, "step": 2195 }, { "epoch": 1.021237017516664, "grad_norm": 0.4325951933860779, "learning_rate": 8.339615916103795e-06, "loss": 0.3129, "step": 2196 }, { "epoch": 1.0217020616958612, "grad_norm": 0.4916847050189972, "learning_rate": 8.33760158718231e-06, "loss": 0.3647, "step": 2197 }, { "epoch": 1.022167105875058, "grad_norm": 0.43246665596961975, "learning_rate": 8.335586280731532e-06, "loss": 0.3216, "step": 2198 }, { "epoch": 1.0226321500542552, "grad_norm": 0.5200701355934143, "learning_rate": 8.333569997341713e-06, "loss": 0.355, "step": 2199 }, { "epoch": 1.0230971942334521, "grad_norm": 0.4282546639442444, "learning_rate": 8.33155273760339e-06, "loss": 0.3829, "step": 2200 }, { "epoch": 1.0235622384126493, "grad_norm": 0.42160195112228394, "learning_rate": 8.329534502107386e-06, "loss": 0.3202, "step": 2201 }, { "epoch": 1.0240272825918462, "grad_norm": 0.42160677909851074, "learning_rate": 8.327515291444807e-06, "loss": 0.3368, "step": 2202 }, { "epoch": 1.0244923267710433, "grad_norm": 0.4635396897792816, "learning_rate": 8.325495106207049e-06, "loss": 0.3864, "step": 2203 }, { "epoch": 1.0249573709502402, "grad_norm": 0.4313600957393646, "learning_rate": 8.32347394698579e-06, "loss": 0.3538, "step": 2204 }, { "epoch": 1.0254224151294373, "grad_norm": 0.4247414767742157, "learning_rate": 8.321451814372998e-06, "loss": 0.3499, "step": 2205 }, { "epoch": 1.0258874593086342, "grad_norm": 0.5032163262367249, "learning_rate": 8.319428708960917e-06, "loss": 0.3768, "step": 2206 }, { "epoch": 1.0263525034878314, "grad_norm": 0.44325828552246094, "learning_rate": 8.317404631342088e-06, "loss": 0.3473, "step": 2207 }, { "epoch": 1.0268175476670283, "grad_norm": 0.3861435353755951, "learning_rate": 8.315379582109326e-06, "loss": 0.3206, "step": 2208 }, { "epoch": 1.0272825918462254, "grad_norm": 0.4646325707435608, "learning_rate": 8.313353561855737e-06, "loss": 0.3673, "step": 2209 }, { "epoch": 1.0277476360254225, "grad_norm": 0.4774647653102875, "learning_rate": 8.31132657117471e-06, "loss": 0.3534, "step": 2210 }, { "epoch": 1.0282126802046194, "grad_norm": 0.4477936029434204, "learning_rate": 8.309298610659917e-06, "loss": 0.3531, "step": 2211 }, { "epoch": 1.0286777243838165, "grad_norm": 0.5256711840629578, "learning_rate": 8.307269680905312e-06, "loss": 0.3825, "step": 2212 }, { "epoch": 1.0291427685630135, "grad_norm": 0.43662089109420776, "learning_rate": 8.305239782505142e-06, "loss": 0.3622, "step": 2213 }, { "epoch": 1.0296078127422106, "grad_norm": 0.4843900501728058, "learning_rate": 8.303208916053924e-06, "loss": 0.3555, "step": 2214 }, { "epoch": 1.0300728569214075, "grad_norm": 0.5311547517776489, "learning_rate": 8.30117708214647e-06, "loss": 0.3647, "step": 2215 }, { "epoch": 1.0305379011006046, "grad_norm": 0.4405294954776764, "learning_rate": 8.299144281377869e-06, "loss": 0.308, "step": 2216 }, { "epoch": 1.0310029452798015, "grad_norm": 0.5497903823852539, "learning_rate": 8.297110514343498e-06, "loss": 0.3571, "step": 2217 }, { "epoch": 1.0314679894589986, "grad_norm": 0.44999319314956665, "learning_rate": 8.295075781639013e-06, "loss": 0.3713, "step": 2218 }, { "epoch": 1.0319330336381956, "grad_norm": 0.5399612188339233, "learning_rate": 8.293040083860352e-06, "loss": 0.3285, "step": 2219 }, { "epoch": 1.0323980778173927, "grad_norm": 0.5000502467155457, "learning_rate": 8.29100342160374e-06, "loss": 0.351, "step": 2220 }, { "epoch": 1.0328631219965896, "grad_norm": 0.5676979422569275, "learning_rate": 8.288965795465684e-06, "loss": 0.3698, "step": 2221 }, { "epoch": 1.0333281661757867, "grad_norm": 0.5297927260398865, "learning_rate": 8.28692720604297e-06, "loss": 0.3449, "step": 2222 }, { "epoch": 1.0337932103549836, "grad_norm": 0.5518194437026978, "learning_rate": 8.284887653932665e-06, "loss": 0.3581, "step": 2223 }, { "epoch": 1.0342582545341807, "grad_norm": 0.49941563606262207, "learning_rate": 8.282847139732125e-06, "loss": 0.3357, "step": 2224 }, { "epoch": 1.0347232987133779, "grad_norm": 0.5394763946533203, "learning_rate": 8.28080566403898e-06, "loss": 0.3671, "step": 2225 }, { "epoch": 1.0351883428925748, "grad_norm": 0.5620566010475159, "learning_rate": 8.278763227451148e-06, "loss": 0.3658, "step": 2226 }, { "epoch": 1.035653387071772, "grad_norm": 0.5896021723747253, "learning_rate": 8.276719830566823e-06, "loss": 0.3925, "step": 2227 }, { "epoch": 1.0361184312509688, "grad_norm": 0.5375012159347534, "learning_rate": 8.274675473984486e-06, "loss": 0.3573, "step": 2228 }, { "epoch": 1.036583475430166, "grad_norm": 0.48443183302879333, "learning_rate": 8.272630158302892e-06, "loss": 0.3385, "step": 2229 }, { "epoch": 1.0370485196093628, "grad_norm": 0.5011869072914124, "learning_rate": 8.270583884121083e-06, "loss": 0.356, "step": 2230 }, { "epoch": 1.03751356378856, "grad_norm": 0.5027603507041931, "learning_rate": 8.268536652038379e-06, "loss": 0.3276, "step": 2231 }, { "epoch": 1.0379786079677569, "grad_norm": 0.593014657497406, "learning_rate": 8.266488462654381e-06, "loss": 0.3687, "step": 2232 }, { "epoch": 1.038443652146954, "grad_norm": 0.6836045384407043, "learning_rate": 8.264439316568969e-06, "loss": 0.3779, "step": 2233 }, { "epoch": 1.038908696326151, "grad_norm": 0.47743064165115356, "learning_rate": 8.262389214382307e-06, "loss": 0.3386, "step": 2234 }, { "epoch": 1.039373740505348, "grad_norm": 0.7030071020126343, "learning_rate": 8.260338156694836e-06, "loss": 0.3694, "step": 2235 }, { "epoch": 1.039838784684545, "grad_norm": 0.468199223279953, "learning_rate": 8.258286144107277e-06, "loss": 0.3337, "step": 2236 }, { "epoch": 1.040303828863742, "grad_norm": 0.43109989166259766, "learning_rate": 8.256233177220632e-06, "loss": 0.3538, "step": 2237 }, { "epoch": 1.0407688730429392, "grad_norm": 0.5127283930778503, "learning_rate": 8.25417925663618e-06, "loss": 0.3447, "step": 2238 }, { "epoch": 1.041233917222136, "grad_norm": 0.5324409604072571, "learning_rate": 8.25212438295548e-06, "loss": 0.3476, "step": 2239 }, { "epoch": 1.0416989614013332, "grad_norm": 0.4684849679470062, "learning_rate": 8.250068556780376e-06, "loss": 0.348, "step": 2240 }, { "epoch": 1.0421640055805301, "grad_norm": 0.5129745006561279, "learning_rate": 8.24801177871298e-06, "loss": 0.3528, "step": 2241 }, { "epoch": 1.0426290497597273, "grad_norm": 0.5319026112556458, "learning_rate": 8.245954049355696e-06, "loss": 0.3404, "step": 2242 }, { "epoch": 1.0430940939389242, "grad_norm": 0.4605168104171753, "learning_rate": 8.243895369311192e-06, "loss": 0.3319, "step": 2243 }, { "epoch": 1.0435591381181213, "grad_norm": 0.4982357621192932, "learning_rate": 8.241835739182426e-06, "loss": 0.3557, "step": 2244 }, { "epoch": 1.0440241822973182, "grad_norm": 0.5314902067184448, "learning_rate": 8.239775159572632e-06, "loss": 0.352, "step": 2245 }, { "epoch": 1.0444892264765153, "grad_norm": 0.4709586799144745, "learning_rate": 8.237713631085316e-06, "loss": 0.3741, "step": 2246 }, { "epoch": 1.0449542706557122, "grad_norm": 0.4382578134536743, "learning_rate": 8.235651154324269e-06, "loss": 0.3414, "step": 2247 }, { "epoch": 1.0454193148349094, "grad_norm": 0.43601369857788086, "learning_rate": 8.233587729893555e-06, "loss": 0.3409, "step": 2248 }, { "epoch": 1.0458843590141063, "grad_norm": 0.4836721122264862, "learning_rate": 8.23152335839752e-06, "loss": 0.3506, "step": 2249 }, { "epoch": 1.0463494031933034, "grad_norm": 0.5360286235809326, "learning_rate": 8.229458040440783e-06, "loss": 0.3817, "step": 2250 }, { "epoch": 1.0468144473725003, "grad_norm": 0.4707900583744049, "learning_rate": 8.227391776628242e-06, "loss": 0.3439, "step": 2251 }, { "epoch": 1.0472794915516974, "grad_norm": 0.48144766688346863, "learning_rate": 8.225324567565071e-06, "loss": 0.3562, "step": 2252 }, { "epoch": 1.0477445357308945, "grad_norm": 0.5262990593910217, "learning_rate": 8.223256413856726e-06, "loss": 0.365, "step": 2253 }, { "epoch": 1.0482095799100914, "grad_norm": 0.5710676312446594, "learning_rate": 8.221187316108935e-06, "loss": 0.3493, "step": 2254 }, { "epoch": 1.0486746240892886, "grad_norm": 0.44150006771087646, "learning_rate": 8.219117274927696e-06, "loss": 0.3598, "step": 2255 }, { "epoch": 1.0491396682684855, "grad_norm": 0.43997588753700256, "learning_rate": 8.2170462909193e-06, "loss": 0.307, "step": 2256 }, { "epoch": 1.0496047124476826, "grad_norm": 0.582988440990448, "learning_rate": 8.2149743646903e-06, "loss": 0.3721, "step": 2257 }, { "epoch": 1.0500697566268795, "grad_norm": 0.4914950728416443, "learning_rate": 8.212901496847528e-06, "loss": 0.3651, "step": 2258 }, { "epoch": 1.0505348008060766, "grad_norm": 0.4819602966308594, "learning_rate": 8.210827687998098e-06, "loss": 0.4051, "step": 2259 }, { "epoch": 1.0509998449852735, "grad_norm": 0.47203314304351807, "learning_rate": 8.208752938749389e-06, "loss": 0.3389, "step": 2260 }, { "epoch": 1.0514648891644707, "grad_norm": 0.5789709687232971, "learning_rate": 8.206677249709066e-06, "loss": 0.3793, "step": 2261 }, { "epoch": 1.0519299333436676, "grad_norm": 0.5130416750907898, "learning_rate": 8.204600621485064e-06, "loss": 0.3352, "step": 2262 }, { "epoch": 1.0523949775228647, "grad_norm": 0.4451179802417755, "learning_rate": 8.202523054685592e-06, "loss": 0.3723, "step": 2263 }, { "epoch": 1.0528600217020616, "grad_norm": 0.5486533641815186, "learning_rate": 8.200444549919135e-06, "loss": 0.3537, "step": 2264 }, { "epoch": 1.0533250658812587, "grad_norm": 0.4916934072971344, "learning_rate": 8.198365107794457e-06, "loss": 0.3705, "step": 2265 }, { "epoch": 1.0537901100604556, "grad_norm": 0.46391212940216064, "learning_rate": 8.196284728920589e-06, "loss": 0.3469, "step": 2266 }, { "epoch": 1.0542551542396528, "grad_norm": 0.5530695915222168, "learning_rate": 8.194203413906843e-06, "loss": 0.3614, "step": 2267 }, { "epoch": 1.05472019841885, "grad_norm": 0.4474685490131378, "learning_rate": 8.1921211633628e-06, "loss": 0.3722, "step": 2268 }, { "epoch": 1.0551852425980468, "grad_norm": 0.4766155779361725, "learning_rate": 8.190037977898319e-06, "loss": 0.3416, "step": 2269 }, { "epoch": 1.055650286777244, "grad_norm": 0.541581392288208, "learning_rate": 8.187953858123529e-06, "loss": 0.3799, "step": 2270 }, { "epoch": 1.0561153309564408, "grad_norm": 0.5254855751991272, "learning_rate": 8.185868804648838e-06, "loss": 0.3685, "step": 2271 }, { "epoch": 1.056580375135638, "grad_norm": 0.4539804458618164, "learning_rate": 8.183782818084922e-06, "loss": 0.3398, "step": 2272 }, { "epoch": 1.0570454193148349, "grad_norm": 0.4451497197151184, "learning_rate": 8.181695899042733e-06, "loss": 0.35, "step": 2273 }, { "epoch": 1.057510463494032, "grad_norm": 0.4841856360435486, "learning_rate": 8.179608048133497e-06, "loss": 0.3849, "step": 2274 }, { "epoch": 1.057975507673229, "grad_norm": 0.3748435080051422, "learning_rate": 8.17751926596871e-06, "loss": 0.3153, "step": 2275 }, { "epoch": 1.058440551852426, "grad_norm": 0.4471512734889984, "learning_rate": 8.175429553160142e-06, "loss": 0.3684, "step": 2276 }, { "epoch": 1.058905596031623, "grad_norm": 0.47942283749580383, "learning_rate": 8.17333891031984e-06, "loss": 0.3909, "step": 2277 }, { "epoch": 1.05937064021082, "grad_norm": 0.3868976831436157, "learning_rate": 8.171247338060113e-06, "loss": 0.3071, "step": 2278 }, { "epoch": 1.059835684390017, "grad_norm": 0.42615851759910583, "learning_rate": 8.16915483699355e-06, "loss": 0.375, "step": 2279 }, { "epoch": 1.060300728569214, "grad_norm": 0.42659878730773926, "learning_rate": 8.167061407733018e-06, "loss": 0.3678, "step": 2280 }, { "epoch": 1.060765772748411, "grad_norm": 0.4303070306777954, "learning_rate": 8.164967050891639e-06, "loss": 0.3455, "step": 2281 }, { "epoch": 1.0612308169276081, "grad_norm": 0.4534488618373871, "learning_rate": 8.16287176708282e-06, "loss": 0.3615, "step": 2282 }, { "epoch": 1.0616958611068052, "grad_norm": 0.5386179685592651, "learning_rate": 8.160775556920236e-06, "loss": 0.3719, "step": 2283 }, { "epoch": 1.0621609052860022, "grad_norm": 0.48542487621307373, "learning_rate": 8.158678421017833e-06, "loss": 0.34, "step": 2284 }, { "epoch": 1.0626259494651993, "grad_norm": 0.43062224984169006, "learning_rate": 8.156580359989827e-06, "loss": 0.3546, "step": 2285 }, { "epoch": 1.0630909936443962, "grad_norm": 0.5932109355926514, "learning_rate": 8.154481374450707e-06, "loss": 0.3409, "step": 2286 }, { "epoch": 1.0635560378235933, "grad_norm": 0.5103998184204102, "learning_rate": 8.15238146501523e-06, "loss": 0.3665, "step": 2287 }, { "epoch": 1.0640210820027902, "grad_norm": 0.41876837611198425, "learning_rate": 8.150280632298426e-06, "loss": 0.313, "step": 2288 }, { "epoch": 1.0644861261819873, "grad_norm": 0.5669198036193848, "learning_rate": 8.148178876915598e-06, "loss": 0.3474, "step": 2289 }, { "epoch": 1.0649511703611843, "grad_norm": 0.5304418206214905, "learning_rate": 8.14607619948231e-06, "loss": 0.3581, "step": 2290 }, { "epoch": 1.0654162145403814, "grad_norm": 0.48082804679870605, "learning_rate": 8.143972600614407e-06, "loss": 0.3727, "step": 2291 }, { "epoch": 1.0658812587195783, "grad_norm": 0.5468645691871643, "learning_rate": 8.141868080927998e-06, "loss": 0.3748, "step": 2292 }, { "epoch": 1.0663463028987754, "grad_norm": 0.46219947934150696, "learning_rate": 8.13976264103946e-06, "loss": 0.392, "step": 2293 }, { "epoch": 1.0668113470779723, "grad_norm": 0.508678674697876, "learning_rate": 8.137656281565445e-06, "loss": 0.345, "step": 2294 }, { "epoch": 1.0672763912571694, "grad_norm": 0.5073127746582031, "learning_rate": 8.135549003122871e-06, "loss": 0.3621, "step": 2295 }, { "epoch": 1.0677414354363663, "grad_norm": 0.4401457905769348, "learning_rate": 8.133440806328925e-06, "loss": 0.3351, "step": 2296 }, { "epoch": 1.0682064796155635, "grad_norm": 0.4791904091835022, "learning_rate": 8.131331691801066e-06, "loss": 0.3336, "step": 2297 }, { "epoch": 1.0686715237947606, "grad_norm": 0.4278305470943451, "learning_rate": 8.129221660157014e-06, "loss": 0.3651, "step": 2298 }, { "epoch": 1.0691365679739575, "grad_norm": 0.47636422514915466, "learning_rate": 8.127110712014767e-06, "loss": 0.3022, "step": 2299 }, { "epoch": 1.0696016121531546, "grad_norm": 0.4815133512020111, "learning_rate": 8.124998847992587e-06, "loss": 0.335, "step": 2300 }, { "epoch": 1.0700666563323515, "grad_norm": 0.4794626235961914, "learning_rate": 8.122886068709003e-06, "loss": 0.374, "step": 2301 }, { "epoch": 1.0705317005115487, "grad_norm": 0.4852800667285919, "learning_rate": 8.120772374782818e-06, "loss": 0.3387, "step": 2302 }, { "epoch": 1.0709967446907456, "grad_norm": 0.4669995903968811, "learning_rate": 8.118657766833093e-06, "loss": 0.332, "step": 2303 }, { "epoch": 1.0714617888699427, "grad_norm": 0.4429568946361542, "learning_rate": 8.116542245479165e-06, "loss": 0.3507, "step": 2304 }, { "epoch": 1.0719268330491396, "grad_norm": 0.49815958738327026, "learning_rate": 8.114425811340635e-06, "loss": 0.3344, "step": 2305 }, { "epoch": 1.0723918772283367, "grad_norm": 0.41795018315315247, "learning_rate": 8.112308465037375e-06, "loss": 0.3721, "step": 2306 }, { "epoch": 1.0728569214075336, "grad_norm": 0.5189369916915894, "learning_rate": 8.110190207189519e-06, "loss": 0.3475, "step": 2307 }, { "epoch": 1.0733219655867308, "grad_norm": 0.5295676589012146, "learning_rate": 8.108071038417471e-06, "loss": 0.378, "step": 2308 }, { "epoch": 1.0737870097659277, "grad_norm": 0.4301398694515228, "learning_rate": 8.1059509593419e-06, "loss": 0.3511, "step": 2309 }, { "epoch": 1.0742520539451248, "grad_norm": 0.47263363003730774, "learning_rate": 8.103829970583742e-06, "loss": 0.3233, "step": 2310 }, { "epoch": 1.0747170981243217, "grad_norm": 0.519551157951355, "learning_rate": 8.101708072764204e-06, "loss": 0.3633, "step": 2311 }, { "epoch": 1.0751821423035188, "grad_norm": 0.40797778964042664, "learning_rate": 8.099585266504753e-06, "loss": 0.3504, "step": 2312 }, { "epoch": 1.075647186482716, "grad_norm": 0.4842034578323364, "learning_rate": 8.097461552427123e-06, "loss": 0.3747, "step": 2313 }, { "epoch": 1.0761122306619129, "grad_norm": 0.5274269580841064, "learning_rate": 8.095336931153318e-06, "loss": 0.3811, "step": 2314 }, { "epoch": 1.07657727484111, "grad_norm": 0.4279079735279083, "learning_rate": 8.093211403305603e-06, "loss": 0.339, "step": 2315 }, { "epoch": 1.077042319020307, "grad_norm": 0.4614260792732239, "learning_rate": 8.09108496950651e-06, "loss": 0.3576, "step": 2316 }, { "epoch": 1.077507363199504, "grad_norm": 0.5641934275627136, "learning_rate": 8.088957630378842e-06, "loss": 0.353, "step": 2317 }, { "epoch": 1.077972407378701, "grad_norm": 0.5266045928001404, "learning_rate": 8.086829386545655e-06, "loss": 0.3438, "step": 2318 }, { "epoch": 1.078437451557898, "grad_norm": 0.39795106649398804, "learning_rate": 8.084700238630283e-06, "loss": 0.2865, "step": 2319 }, { "epoch": 1.078902495737095, "grad_norm": 0.47763675451278687, "learning_rate": 8.082570187256315e-06, "loss": 0.3863, "step": 2320 }, { "epoch": 1.079367539916292, "grad_norm": 0.43691733479499817, "learning_rate": 8.080439233047612e-06, "loss": 0.3406, "step": 2321 }, { "epoch": 1.079832584095489, "grad_norm": 0.42221641540527344, "learning_rate": 8.078307376628292e-06, "loss": 0.3743, "step": 2322 }, { "epoch": 1.0802976282746861, "grad_norm": 0.4146184027194977, "learning_rate": 8.076174618622744e-06, "loss": 0.3584, "step": 2323 }, { "epoch": 1.080762672453883, "grad_norm": 0.4470728039741516, "learning_rate": 8.074040959655616e-06, "loss": 0.379, "step": 2324 }, { "epoch": 1.0812277166330801, "grad_norm": 0.512160062789917, "learning_rate": 8.071906400351823e-06, "loss": 0.3425, "step": 2325 }, { "epoch": 1.081692760812277, "grad_norm": 0.39240655303001404, "learning_rate": 8.069770941336542e-06, "loss": 0.35, "step": 2326 }, { "epoch": 1.0821578049914742, "grad_norm": 0.4476917088031769, "learning_rate": 8.067634583235215e-06, "loss": 0.3455, "step": 2327 }, { "epoch": 1.0826228491706713, "grad_norm": 0.5166687965393066, "learning_rate": 8.065497326673548e-06, "loss": 0.4265, "step": 2328 }, { "epoch": 1.0830878933498682, "grad_norm": 0.4078577160835266, "learning_rate": 8.063359172277507e-06, "loss": 0.3306, "step": 2329 }, { "epoch": 1.0835529375290653, "grad_norm": 0.4202713072299957, "learning_rate": 8.061220120673323e-06, "loss": 0.3573, "step": 2330 }, { "epoch": 1.0840179817082622, "grad_norm": 0.4990968108177185, "learning_rate": 8.05908017248749e-06, "loss": 0.3774, "step": 2331 }, { "epoch": 1.0844830258874594, "grad_norm": 0.4540191888809204, "learning_rate": 8.056939328346763e-06, "loss": 0.3521, "step": 2332 }, { "epoch": 1.0849480700666563, "grad_norm": 0.3652085065841675, "learning_rate": 8.05479758887816e-06, "loss": 0.2742, "step": 2333 }, { "epoch": 1.0854131142458534, "grad_norm": 0.5675728917121887, "learning_rate": 8.052654954708966e-06, "loss": 0.3901, "step": 2334 }, { "epoch": 1.0858781584250503, "grad_norm": 0.46835145354270935, "learning_rate": 8.050511426466717e-06, "loss": 0.3319, "step": 2335 }, { "epoch": 1.0863432026042474, "grad_norm": 0.4239199459552765, "learning_rate": 8.048367004779223e-06, "loss": 0.3336, "step": 2336 }, { "epoch": 1.0868082467834443, "grad_norm": 0.5129764080047607, "learning_rate": 8.046221690274547e-06, "loss": 0.3662, "step": 2337 }, { "epoch": 1.0872732909626415, "grad_norm": 0.46359536051750183, "learning_rate": 8.04407548358102e-06, "loss": 0.3276, "step": 2338 }, { "epoch": 1.0877383351418384, "grad_norm": 0.4142586290836334, "learning_rate": 8.041928385327229e-06, "loss": 0.3574, "step": 2339 }, { "epoch": 1.0882033793210355, "grad_norm": 0.4258030951023102, "learning_rate": 8.039780396142023e-06, "loss": 0.334, "step": 2340 }, { "epoch": 1.0886684235002324, "grad_norm": 0.43229323625564575, "learning_rate": 8.037631516654516e-06, "loss": 0.3329, "step": 2341 }, { "epoch": 1.0891334676794295, "grad_norm": 0.4798569977283478, "learning_rate": 8.035481747494078e-06, "loss": 0.3859, "step": 2342 }, { "epoch": 1.0895985118586267, "grad_norm": 0.4725603759288788, "learning_rate": 8.033331089290342e-06, "loss": 0.3613, "step": 2343 }, { "epoch": 1.0900635560378236, "grad_norm": 0.4565368890762329, "learning_rate": 8.0311795426732e-06, "loss": 0.3214, "step": 2344 }, { "epoch": 1.0905286002170207, "grad_norm": 0.436138778924942, "learning_rate": 8.029027108272806e-06, "loss": 0.3499, "step": 2345 }, { "epoch": 1.0909936443962176, "grad_norm": 0.4970560371875763, "learning_rate": 8.026873786719574e-06, "loss": 0.3763, "step": 2346 }, { "epoch": 1.0914586885754147, "grad_norm": 0.47304481267929077, "learning_rate": 8.024719578644176e-06, "loss": 0.3628, "step": 2347 }, { "epoch": 1.0919237327546116, "grad_norm": 0.4886552393436432, "learning_rate": 8.022564484677545e-06, "loss": 0.3415, "step": 2348 }, { "epoch": 1.0923887769338088, "grad_norm": 0.4249359965324402, "learning_rate": 8.020408505450869e-06, "loss": 0.3737, "step": 2349 }, { "epoch": 1.0928538211130057, "grad_norm": 0.43461641669273376, "learning_rate": 8.018251641595604e-06, "loss": 0.3466, "step": 2350 }, { "epoch": 1.0933188652922028, "grad_norm": 0.49044400453567505, "learning_rate": 8.016093893743462e-06, "loss": 0.3437, "step": 2351 }, { "epoch": 1.0937839094713997, "grad_norm": 0.5193617343902588, "learning_rate": 8.013935262526407e-06, "loss": 0.4029, "step": 2352 }, { "epoch": 1.0942489536505968, "grad_norm": 0.4232480823993683, "learning_rate": 8.01177574857667e-06, "loss": 0.3253, "step": 2353 }, { "epoch": 1.0947139978297937, "grad_norm": 0.5477461814880371, "learning_rate": 8.009615352526737e-06, "loss": 0.3584, "step": 2354 }, { "epoch": 1.0951790420089909, "grad_norm": 0.4962575137615204, "learning_rate": 8.007454075009352e-06, "loss": 0.387, "step": 2355 }, { "epoch": 1.0956440861881878, "grad_norm": 0.4006698727607727, "learning_rate": 8.00529191665752e-06, "loss": 0.3545, "step": 2356 }, { "epoch": 1.0961091303673849, "grad_norm": 0.44313761591911316, "learning_rate": 8.0031288781045e-06, "loss": 0.3291, "step": 2357 }, { "epoch": 1.096574174546582, "grad_norm": 0.525131344795227, "learning_rate": 8.000964959983815e-06, "loss": 0.3665, "step": 2358 }, { "epoch": 1.097039218725779, "grad_norm": 0.44491708278656006, "learning_rate": 7.998800162929236e-06, "loss": 0.3512, "step": 2359 }, { "epoch": 1.097504262904976, "grad_norm": 0.5135394334793091, "learning_rate": 7.9966344875748e-06, "loss": 0.3638, "step": 2360 }, { "epoch": 1.097969307084173, "grad_norm": 0.5230858325958252, "learning_rate": 7.994467934554794e-06, "loss": 0.3372, "step": 2361 }, { "epoch": 1.09843435126337, "grad_norm": 0.5185748934745789, "learning_rate": 7.992300504503774e-06, "loss": 0.395, "step": 2362 }, { "epoch": 1.098899395442567, "grad_norm": 0.440336138010025, "learning_rate": 7.990132198056538e-06, "loss": 0.3397, "step": 2363 }, { "epoch": 1.0993644396217641, "grad_norm": 0.539335310459137, "learning_rate": 7.987963015848152e-06, "loss": 0.3714, "step": 2364 }, { "epoch": 1.099829483800961, "grad_norm": 0.4946892559528351, "learning_rate": 7.985792958513932e-06, "loss": 0.3258, "step": 2365 }, { "epoch": 1.1002945279801581, "grad_norm": 0.4519208073616028, "learning_rate": 7.983622026689452e-06, "loss": 0.3841, "step": 2366 }, { "epoch": 1.100759572159355, "grad_norm": 0.4441494345664978, "learning_rate": 7.981450221010547e-06, "loss": 0.3603, "step": 2367 }, { "epoch": 1.1012246163385522, "grad_norm": 0.545825183391571, "learning_rate": 7.979277542113297e-06, "loss": 0.3385, "step": 2368 }, { "epoch": 1.1016896605177493, "grad_norm": 0.46227794885635376, "learning_rate": 7.97710399063405e-06, "loss": 0.3456, "step": 2369 }, { "epoch": 1.1021547046969462, "grad_norm": 0.405261754989624, "learning_rate": 7.974929567209399e-06, "loss": 0.3543, "step": 2370 }, { "epoch": 1.1026197488761433, "grad_norm": 0.5703674554824829, "learning_rate": 7.972754272476203e-06, "loss": 0.3407, "step": 2371 }, { "epoch": 1.1030847930553402, "grad_norm": 0.5644888877868652, "learning_rate": 7.970578107071566e-06, "loss": 0.4069, "step": 2372 }, { "epoch": 1.1035498372345374, "grad_norm": 0.391647070646286, "learning_rate": 7.968401071632854e-06, "loss": 0.3582, "step": 2373 }, { "epoch": 1.1040148814137343, "grad_norm": 0.6647116541862488, "learning_rate": 7.966223166797684e-06, "loss": 0.3614, "step": 2374 }, { "epoch": 1.1044799255929314, "grad_norm": 0.4719175696372986, "learning_rate": 7.964044393203928e-06, "loss": 0.3655, "step": 2375 }, { "epoch": 1.1049449697721283, "grad_norm": 0.43519163131713867, "learning_rate": 7.961864751489717e-06, "loss": 0.3422, "step": 2376 }, { "epoch": 1.1054100139513254, "grad_norm": 0.5003102421760559, "learning_rate": 7.959684242293428e-06, "loss": 0.326, "step": 2377 }, { "epoch": 1.1058750581305223, "grad_norm": 0.5152011513710022, "learning_rate": 7.957502866253699e-06, "loss": 0.313, "step": 2378 }, { "epoch": 1.1063401023097195, "grad_norm": 0.46767157316207886, "learning_rate": 7.955320624009421e-06, "loss": 0.374, "step": 2379 }, { "epoch": 1.1068051464889164, "grad_norm": 0.4796226918697357, "learning_rate": 7.953137516199737e-06, "loss": 0.394, "step": 2380 }, { "epoch": 1.1072701906681135, "grad_norm": 0.47181200981140137, "learning_rate": 7.950953543464039e-06, "loss": 0.3253, "step": 2381 }, { "epoch": 1.1077352348473104, "grad_norm": 0.48946288228034973, "learning_rate": 7.948768706441985e-06, "loss": 0.3536, "step": 2382 }, { "epoch": 1.1082002790265075, "grad_norm": 0.43955692648887634, "learning_rate": 7.946583005773471e-06, "loss": 0.3505, "step": 2383 }, { "epoch": 1.1086653232057047, "grad_norm": 0.5506858825683594, "learning_rate": 7.944396442098659e-06, "loss": 0.3846, "step": 2384 }, { "epoch": 1.1091303673849016, "grad_norm": 0.38817930221557617, "learning_rate": 7.942209016057954e-06, "loss": 0.3297, "step": 2385 }, { "epoch": 1.1095954115640987, "grad_norm": 0.4331190288066864, "learning_rate": 7.94002072829202e-06, "loss": 0.3632, "step": 2386 }, { "epoch": 1.1100604557432956, "grad_norm": 0.4732077419757843, "learning_rate": 7.937831579441768e-06, "loss": 0.3593, "step": 2387 }, { "epoch": 1.1105254999224927, "grad_norm": 0.3843960464000702, "learning_rate": 7.935641570148368e-06, "loss": 0.3042, "step": 2388 }, { "epoch": 1.1109905441016896, "grad_norm": 0.43816637992858887, "learning_rate": 7.933450701053235e-06, "loss": 0.3798, "step": 2389 }, { "epoch": 1.1114555882808868, "grad_norm": 0.5748831033706665, "learning_rate": 7.931258972798041e-06, "loss": 0.3534, "step": 2390 }, { "epoch": 1.1119206324600837, "grad_norm": 0.48593536019325256, "learning_rate": 7.929066386024707e-06, "loss": 0.3994, "step": 2391 }, { "epoch": 1.1123856766392808, "grad_norm": 0.5419626832008362, "learning_rate": 7.926872941375404e-06, "loss": 0.331, "step": 2392 }, { "epoch": 1.1128507208184777, "grad_norm": 0.5766515731811523, "learning_rate": 7.924678639492559e-06, "loss": 0.3805, "step": 2393 }, { "epoch": 1.1133157649976748, "grad_norm": 0.4355737268924713, "learning_rate": 7.922483481018848e-06, "loss": 0.3525, "step": 2394 }, { "epoch": 1.1137808091768717, "grad_norm": 0.5494896173477173, "learning_rate": 7.920287466597193e-06, "loss": 0.3321, "step": 2395 }, { "epoch": 1.1142458533560688, "grad_norm": 0.5438748598098755, "learning_rate": 7.918090596870776e-06, "loss": 0.3795, "step": 2396 }, { "epoch": 1.1147108975352658, "grad_norm": 0.4230845868587494, "learning_rate": 7.915892872483023e-06, "loss": 0.3506, "step": 2397 }, { "epoch": 1.1151759417144629, "grad_norm": 0.6087936162948608, "learning_rate": 7.913694294077607e-06, "loss": 0.3583, "step": 2398 }, { "epoch": 1.11564098589366, "grad_norm": 0.5080538988113403, "learning_rate": 7.911494862298464e-06, "loss": 0.3579, "step": 2399 }, { "epoch": 1.116106030072857, "grad_norm": 0.5257863402366638, "learning_rate": 7.909294577789765e-06, "loss": 0.3751, "step": 2400 }, { "epoch": 1.116571074252054, "grad_norm": 0.4901667833328247, "learning_rate": 7.90709344119594e-06, "loss": 0.3494, "step": 2401 }, { "epoch": 1.117036118431251, "grad_norm": 0.5573136210441589, "learning_rate": 7.90489145316167e-06, "loss": 0.3318, "step": 2402 }, { "epoch": 1.117501162610448, "grad_norm": 0.5585957765579224, "learning_rate": 7.902688614331875e-06, "loss": 0.3737, "step": 2403 }, { "epoch": 1.117966206789645, "grad_norm": 0.5125075578689575, "learning_rate": 7.900484925351734e-06, "loss": 0.4047, "step": 2404 }, { "epoch": 1.118431250968842, "grad_norm": 0.45521363615989685, "learning_rate": 7.898280386866673e-06, "loss": 0.321, "step": 2405 }, { "epoch": 1.118896295148039, "grad_norm": 0.5232580304145813, "learning_rate": 7.896074999522362e-06, "loss": 0.3717, "step": 2406 }, { "epoch": 1.1193613393272361, "grad_norm": 0.395935982465744, "learning_rate": 7.893868763964724e-06, "loss": 0.3292, "step": 2407 }, { "epoch": 1.119826383506433, "grad_norm": 0.5463009476661682, "learning_rate": 7.891661680839932e-06, "loss": 0.373, "step": 2408 }, { "epoch": 1.1202914276856302, "grad_norm": 0.44592776894569397, "learning_rate": 7.889453750794405e-06, "loss": 0.3653, "step": 2409 }, { "epoch": 1.120756471864827, "grad_norm": 0.45497268438339233, "learning_rate": 7.887244974474807e-06, "loss": 0.3252, "step": 2410 }, { "epoch": 1.1212215160440242, "grad_norm": 0.4712156653404236, "learning_rate": 7.885035352528054e-06, "loss": 0.3514, "step": 2411 }, { "epoch": 1.121686560223221, "grad_norm": 0.4327942430973053, "learning_rate": 7.882824885601308e-06, "loss": 0.3591, "step": 2412 }, { "epoch": 1.1221516044024182, "grad_norm": 0.3762139678001404, "learning_rate": 7.88061357434198e-06, "loss": 0.3263, "step": 2413 }, { "epoch": 1.1226166485816154, "grad_norm": 0.5221520662307739, "learning_rate": 7.878401419397725e-06, "loss": 0.3907, "step": 2414 }, { "epoch": 1.1230816927608123, "grad_norm": 0.41757580637931824, "learning_rate": 7.87618842141645e-06, "loss": 0.3556, "step": 2415 }, { "epoch": 1.1235467369400094, "grad_norm": 0.4390140771865845, "learning_rate": 7.873974581046303e-06, "loss": 0.3543, "step": 2416 }, { "epoch": 1.1240117811192063, "grad_norm": 0.40529686212539673, "learning_rate": 7.871759898935685e-06, "loss": 0.3468, "step": 2417 }, { "epoch": 1.1244768252984034, "grad_norm": 0.4217506945133209, "learning_rate": 7.86954437573324e-06, "loss": 0.3467, "step": 2418 }, { "epoch": 1.1249418694776003, "grad_norm": 0.418305367231369, "learning_rate": 7.867328012087856e-06, "loss": 0.3951, "step": 2419 }, { "epoch": 1.1254069136567975, "grad_norm": 0.42547518014907837, "learning_rate": 7.865110808648671e-06, "loss": 0.3369, "step": 2420 }, { "epoch": 1.1258719578359944, "grad_norm": 0.41389068961143494, "learning_rate": 7.862892766065072e-06, "loss": 0.3423, "step": 2421 }, { "epoch": 1.1263370020151915, "grad_norm": 0.47316908836364746, "learning_rate": 7.86067388498668e-06, "loss": 0.375, "step": 2422 }, { "epoch": 1.1268020461943884, "grad_norm": 0.4010957181453705, "learning_rate": 7.858454166063376e-06, "loss": 0.3348, "step": 2423 }, { "epoch": 1.1272670903735855, "grad_norm": 0.4541621804237366, "learning_rate": 7.856233609945276e-06, "loss": 0.3627, "step": 2424 }, { "epoch": 1.1277321345527824, "grad_norm": 0.4253348410129547, "learning_rate": 7.854012217282747e-06, "loss": 0.3391, "step": 2425 }, { "epoch": 1.1281971787319796, "grad_norm": 0.4224770963191986, "learning_rate": 7.851789988726397e-06, "loss": 0.3227, "step": 2426 }, { "epoch": 1.1286622229111765, "grad_norm": 0.4292997121810913, "learning_rate": 7.849566924927082e-06, "loss": 0.3815, "step": 2427 }, { "epoch": 1.1291272670903736, "grad_norm": 0.39106887578964233, "learning_rate": 7.8473430265359e-06, "loss": 0.3313, "step": 2428 }, { "epoch": 1.1295923112695707, "grad_norm": 0.41027796268463135, "learning_rate": 7.845118294204195e-06, "loss": 0.3478, "step": 2429 }, { "epoch": 1.1300573554487676, "grad_norm": 0.454773873090744, "learning_rate": 7.842892728583557e-06, "loss": 0.3566, "step": 2430 }, { "epoch": 1.1305223996279647, "grad_norm": 0.4108147919178009, "learning_rate": 7.840666330325815e-06, "loss": 0.338, "step": 2431 }, { "epoch": 1.1309874438071617, "grad_norm": 0.41285791993141174, "learning_rate": 7.838439100083048e-06, "loss": 0.372, "step": 2432 }, { "epoch": 1.1314524879863588, "grad_norm": 0.44633156061172485, "learning_rate": 7.836211038507571e-06, "loss": 0.341, "step": 2433 }, { "epoch": 1.1319175321655557, "grad_norm": 0.3967339098453522, "learning_rate": 7.833982146251952e-06, "loss": 0.3189, "step": 2434 }, { "epoch": 1.1323825763447528, "grad_norm": 0.4380015432834625, "learning_rate": 7.831752423968995e-06, "loss": 0.3506, "step": 2435 }, { "epoch": 1.1328476205239497, "grad_norm": 0.5487619638442993, "learning_rate": 7.829521872311747e-06, "loss": 0.3675, "step": 2436 }, { "epoch": 1.1333126647031468, "grad_norm": 0.4468625783920288, "learning_rate": 7.827290491933506e-06, "loss": 0.3792, "step": 2437 }, { "epoch": 1.1337777088823437, "grad_norm": 0.3934735059738159, "learning_rate": 7.825058283487803e-06, "loss": 0.3421, "step": 2438 }, { "epoch": 1.1342427530615409, "grad_norm": 0.49053192138671875, "learning_rate": 7.822825247628416e-06, "loss": 0.3482, "step": 2439 }, { "epoch": 1.1347077972407378, "grad_norm": 0.512285053730011, "learning_rate": 7.820591385009366e-06, "loss": 0.3616, "step": 2440 }, { "epoch": 1.135172841419935, "grad_norm": 0.49586769938468933, "learning_rate": 7.818356696284916e-06, "loss": 0.3796, "step": 2441 }, { "epoch": 1.1356378855991318, "grad_norm": 0.4258926808834076, "learning_rate": 7.816121182109567e-06, "loss": 0.332, "step": 2442 }, { "epoch": 1.136102929778329, "grad_norm": 0.45421749353408813, "learning_rate": 7.813884843138067e-06, "loss": 0.3504, "step": 2443 }, { "epoch": 1.136567973957526, "grad_norm": 0.45513275265693665, "learning_rate": 7.811647680025403e-06, "loss": 0.3345, "step": 2444 }, { "epoch": 1.137033018136723, "grad_norm": 0.4567432403564453, "learning_rate": 7.809409693426803e-06, "loss": 0.3494, "step": 2445 }, { "epoch": 1.13749806231592, "grad_norm": 0.4656786620616913, "learning_rate": 7.807170883997738e-06, "loss": 0.3428, "step": 2446 }, { "epoch": 1.137963106495117, "grad_norm": 0.44131651520729065, "learning_rate": 7.804931252393918e-06, "loss": 0.3475, "step": 2447 }, { "epoch": 1.1384281506743141, "grad_norm": 0.42925527691841125, "learning_rate": 7.802690799271295e-06, "loss": 0.3882, "step": 2448 }, { "epoch": 1.138893194853511, "grad_norm": 0.4417758584022522, "learning_rate": 7.800449525286062e-06, "loss": 0.3428, "step": 2449 }, { "epoch": 1.1393582390327082, "grad_norm": 0.4203170835971832, "learning_rate": 7.79820743109465e-06, "loss": 0.3425, "step": 2450 }, { "epoch": 1.139823283211905, "grad_norm": 0.45581138134002686, "learning_rate": 7.795964517353734e-06, "loss": 0.3596, "step": 2451 }, { "epoch": 1.1402883273911022, "grad_norm": 0.44257980585098267, "learning_rate": 7.793720784720227e-06, "loss": 0.3394, "step": 2452 }, { "epoch": 1.140753371570299, "grad_norm": 0.40145984292030334, "learning_rate": 7.791476233851281e-06, "loss": 0.3502, "step": 2453 }, { "epoch": 1.1412184157494962, "grad_norm": 0.48211121559143066, "learning_rate": 7.789230865404287e-06, "loss": 0.3394, "step": 2454 }, { "epoch": 1.1416834599286931, "grad_norm": 0.4565165340900421, "learning_rate": 7.78698468003688e-06, "loss": 0.3687, "step": 2455 }, { "epoch": 1.1421485041078903, "grad_norm": 0.4464455842971802, "learning_rate": 7.784737678406929e-06, "loss": 0.3255, "step": 2456 }, { "epoch": 1.1426135482870872, "grad_norm": 0.6133958101272583, "learning_rate": 7.782489861172545e-06, "loss": 0.3996, "step": 2457 }, { "epoch": 1.1430785924662843, "grad_norm": 0.3757973909378052, "learning_rate": 7.780241228992075e-06, "loss": 0.3129, "step": 2458 }, { "epoch": 1.1435436366454814, "grad_norm": 0.5780712366104126, "learning_rate": 7.777991782524112e-06, "loss": 0.3765, "step": 2459 }, { "epoch": 1.1440086808246783, "grad_norm": 0.5257070660591125, "learning_rate": 7.775741522427477e-06, "loss": 0.3453, "step": 2460 }, { "epoch": 1.1444737250038755, "grad_norm": 0.49410927295684814, "learning_rate": 7.773490449361238e-06, "loss": 0.3747, "step": 2461 }, { "epoch": 1.1449387691830724, "grad_norm": 0.46261581778526306, "learning_rate": 7.771238563984696e-06, "loss": 0.3363, "step": 2462 }, { "epoch": 1.1454038133622695, "grad_norm": 0.6092031598091125, "learning_rate": 7.768985866957392e-06, "loss": 0.3472, "step": 2463 }, { "epoch": 1.1458688575414664, "grad_norm": 0.42954081296920776, "learning_rate": 7.766732358939106e-06, "loss": 0.3336, "step": 2464 }, { "epoch": 1.1463339017206635, "grad_norm": 0.4083530604839325, "learning_rate": 7.764478040589854e-06, "loss": 0.3561, "step": 2465 }, { "epoch": 1.1467989458998604, "grad_norm": 0.46050816774368286, "learning_rate": 7.762222912569886e-06, "loss": 0.3711, "step": 2466 }, { "epoch": 1.1472639900790575, "grad_norm": 0.460467129945755, "learning_rate": 7.759966975539693e-06, "loss": 0.3107, "step": 2467 }, { "epoch": 1.1477290342582545, "grad_norm": 0.5141720771789551, "learning_rate": 7.757710230160003e-06, "loss": 0.3846, "step": 2468 }, { "epoch": 1.1481940784374516, "grad_norm": 0.45427727699279785, "learning_rate": 7.755452677091783e-06, "loss": 0.3422, "step": 2469 }, { "epoch": 1.1486591226166485, "grad_norm": 0.47798460721969604, "learning_rate": 7.75319431699623e-06, "loss": 0.3462, "step": 2470 }, { "epoch": 1.1491241667958456, "grad_norm": 0.4749840497970581, "learning_rate": 7.750935150534781e-06, "loss": 0.3547, "step": 2471 }, { "epoch": 1.1495892109750425, "grad_norm": 0.46182170510292053, "learning_rate": 7.748675178369112e-06, "loss": 0.395, "step": 2472 }, { "epoch": 1.1500542551542396, "grad_norm": 0.504657506942749, "learning_rate": 7.74641440116113e-06, "loss": 0.3707, "step": 2473 }, { "epoch": 1.1505192993334368, "grad_norm": 0.4313875436782837, "learning_rate": 7.74415281957298e-06, "loss": 0.3357, "step": 2474 }, { "epoch": 1.1509843435126337, "grad_norm": 0.5049738883972168, "learning_rate": 7.741890434267043e-06, "loss": 0.3556, "step": 2475 }, { "epoch": 1.1514493876918308, "grad_norm": 0.4582679569721222, "learning_rate": 7.739627245905935e-06, "loss": 0.3233, "step": 2476 }, { "epoch": 1.1519144318710277, "grad_norm": 0.463096559047699, "learning_rate": 7.737363255152506e-06, "loss": 0.3661, "step": 2477 }, { "epoch": 1.1523794760502248, "grad_norm": 0.4227137267589569, "learning_rate": 7.735098462669843e-06, "loss": 0.3238, "step": 2478 }, { "epoch": 1.1528445202294217, "grad_norm": 0.5194125771522522, "learning_rate": 7.732832869121267e-06, "loss": 0.3713, "step": 2479 }, { "epoch": 1.1533095644086189, "grad_norm": 0.5065972805023193, "learning_rate": 7.730566475170334e-06, "loss": 0.3743, "step": 2480 }, { "epoch": 1.1537746085878158, "grad_norm": 0.4301440715789795, "learning_rate": 7.728299281480833e-06, "loss": 0.3142, "step": 2481 }, { "epoch": 1.154239652767013, "grad_norm": 0.4768223464488983, "learning_rate": 7.726031288716789e-06, "loss": 0.3369, "step": 2482 }, { "epoch": 1.1547046969462098, "grad_norm": 0.5396889448165894, "learning_rate": 7.723762497542459e-06, "loss": 0.383, "step": 2483 }, { "epoch": 1.155169741125407, "grad_norm": 0.4935142993927002, "learning_rate": 7.72149290862234e-06, "loss": 0.3244, "step": 2484 }, { "epoch": 1.155634785304604, "grad_norm": 0.428690642118454, "learning_rate": 7.719222522621149e-06, "loss": 0.3658, "step": 2485 }, { "epoch": 1.156099829483801, "grad_norm": 0.4382781982421875, "learning_rate": 7.716951340203851e-06, "loss": 0.3524, "step": 2486 }, { "epoch": 1.1565648736629979, "grad_norm": 0.594252347946167, "learning_rate": 7.714679362035638e-06, "loss": 0.3808, "step": 2487 }, { "epoch": 1.157029917842195, "grad_norm": 0.4175407290458679, "learning_rate": 7.712406588781935e-06, "loss": 0.3164, "step": 2488 }, { "epoch": 1.1574949620213921, "grad_norm": 0.4126685559749603, "learning_rate": 7.7101330211084e-06, "loss": 0.3416, "step": 2489 }, { "epoch": 1.157960006200589, "grad_norm": 0.5235841274261475, "learning_rate": 7.707858659680924e-06, "loss": 0.3892, "step": 2490 }, { "epoch": 1.1584250503797862, "grad_norm": 0.3950689733028412, "learning_rate": 7.70558350516563e-06, "loss": 0.3272, "step": 2491 }, { "epoch": 1.158890094558983, "grad_norm": 0.4256301820278168, "learning_rate": 7.703307558228875e-06, "loss": 0.3423, "step": 2492 }, { "epoch": 1.1593551387381802, "grad_norm": 0.4690292775630951, "learning_rate": 7.701030819537248e-06, "loss": 0.333, "step": 2493 }, { "epoch": 1.159820182917377, "grad_norm": 0.4405955672264099, "learning_rate": 7.698753289757565e-06, "loss": 0.3981, "step": 2494 }, { "epoch": 1.1602852270965742, "grad_norm": 0.5139094591140747, "learning_rate": 7.69647496955688e-06, "loss": 0.35, "step": 2495 }, { "epoch": 1.1607502712757711, "grad_norm": 0.39744675159454346, "learning_rate": 7.694195859602475e-06, "loss": 0.3465, "step": 2496 }, { "epoch": 1.1612153154549683, "grad_norm": 0.4022337794303894, "learning_rate": 7.691915960561869e-06, "loss": 0.3574, "step": 2497 }, { "epoch": 1.1616803596341652, "grad_norm": 0.3752380907535553, "learning_rate": 7.6896352731028e-06, "loss": 0.3333, "step": 2498 }, { "epoch": 1.1621454038133623, "grad_norm": 0.48939090967178345, "learning_rate": 7.687353797893249e-06, "loss": 0.3831, "step": 2499 }, { "epoch": 1.1626104479925594, "grad_norm": 0.4494510591030121, "learning_rate": 7.68507153560142e-06, "loss": 0.3882, "step": 2500 }, { "epoch": 1.1630754921717563, "grad_norm": 0.38291284441947937, "learning_rate": 7.682788486895754e-06, "loss": 0.3589, "step": 2501 }, { "epoch": 1.1635405363509532, "grad_norm": 0.383709192276001, "learning_rate": 7.680504652444917e-06, "loss": 0.37, "step": 2502 }, { "epoch": 1.1640055805301504, "grad_norm": 0.507161557674408, "learning_rate": 7.678220032917806e-06, "loss": 0.3843, "step": 2503 }, { "epoch": 1.1644706247093475, "grad_norm": 0.3985297977924347, "learning_rate": 7.675934628983551e-06, "loss": 0.3142, "step": 2504 }, { "epoch": 1.1649356688885444, "grad_norm": 0.41284704208374023, "learning_rate": 7.67364844131151e-06, "loss": 0.3457, "step": 2505 }, { "epoch": 1.1654007130677415, "grad_norm": 0.49253928661346436, "learning_rate": 7.671361470571265e-06, "loss": 0.3974, "step": 2506 }, { "epoch": 1.1658657572469384, "grad_norm": 0.41742268204689026, "learning_rate": 7.669073717432641e-06, "loss": 0.346, "step": 2507 }, { "epoch": 1.1663308014261355, "grad_norm": 0.4668549597263336, "learning_rate": 7.666785182565676e-06, "loss": 0.3568, "step": 2508 }, { "epoch": 1.1667958456053324, "grad_norm": 0.43478819727897644, "learning_rate": 7.66449586664065e-06, "loss": 0.2967, "step": 2509 }, { "epoch": 1.1672608897845296, "grad_norm": 0.5247063636779785, "learning_rate": 7.662205770328064e-06, "loss": 0.4326, "step": 2510 }, { "epoch": 1.1677259339637265, "grad_norm": 0.3936578333377838, "learning_rate": 7.65991489429865e-06, "loss": 0.3177, "step": 2511 }, { "epoch": 1.1681909781429236, "grad_norm": 0.43789342045783997, "learning_rate": 7.65762323922337e-06, "loss": 0.3701, "step": 2512 }, { "epoch": 1.1686560223221205, "grad_norm": 0.46906614303588867, "learning_rate": 7.655330805773411e-06, "loss": 0.3368, "step": 2513 }, { "epoch": 1.1691210665013176, "grad_norm": 0.4493185877799988, "learning_rate": 7.653037594620189e-06, "loss": 0.3654, "step": 2514 }, { "epoch": 1.1695861106805148, "grad_norm": 0.40783825516700745, "learning_rate": 7.650743606435352e-06, "loss": 0.3143, "step": 2515 }, { "epoch": 1.1700511548597117, "grad_norm": 0.5146582722663879, "learning_rate": 7.648448841890765e-06, "loss": 0.3812, "step": 2516 }, { "epoch": 1.1705161990389086, "grad_norm": 0.4294111430644989, "learning_rate": 7.646153301658534e-06, "loss": 0.3192, "step": 2517 }, { "epoch": 1.1709812432181057, "grad_norm": 0.4016132354736328, "learning_rate": 7.643856986410983e-06, "loss": 0.3582, "step": 2518 }, { "epoch": 1.1714462873973028, "grad_norm": 0.4334622919559479, "learning_rate": 7.641559896820664e-06, "loss": 0.3611, "step": 2519 }, { "epoch": 1.1719113315764997, "grad_norm": 0.4894271492958069, "learning_rate": 7.63926203356036e-06, "loss": 0.3673, "step": 2520 }, { "epoch": 1.1723763757556969, "grad_norm": 0.45334476232528687, "learning_rate": 7.636963397303074e-06, "loss": 0.3622, "step": 2521 }, { "epoch": 1.1728414199348938, "grad_norm": 0.4029448926448822, "learning_rate": 7.634663988722044e-06, "loss": 0.3332, "step": 2522 }, { "epoch": 1.173306464114091, "grad_norm": 0.4322480261325836, "learning_rate": 7.632363808490726e-06, "loss": 0.3174, "step": 2523 }, { "epoch": 1.1737715082932878, "grad_norm": 0.4665205180644989, "learning_rate": 7.630062857282806e-06, "loss": 0.3799, "step": 2524 }, { "epoch": 1.174236552472485, "grad_norm": 0.438932329416275, "learning_rate": 7.627761135772196e-06, "loss": 0.3446, "step": 2525 }, { "epoch": 1.1747015966516818, "grad_norm": 0.40907543897628784, "learning_rate": 7.625458644633032e-06, "loss": 0.3606, "step": 2526 }, { "epoch": 1.175166640830879, "grad_norm": 0.39875200390815735, "learning_rate": 7.623155384539678e-06, "loss": 0.315, "step": 2527 }, { "epoch": 1.1756316850100759, "grad_norm": 0.39767640829086304, "learning_rate": 7.6208513561667184e-06, "loss": 0.3272, "step": 2528 }, { "epoch": 1.176096729189273, "grad_norm": 0.4325427711009979, "learning_rate": 7.618546560188968e-06, "loss": 0.3653, "step": 2529 }, { "epoch": 1.1765617733684701, "grad_norm": 0.43984878063201904, "learning_rate": 7.616240997281465e-06, "loss": 0.3544, "step": 2530 }, { "epoch": 1.177026817547667, "grad_norm": 0.42820581793785095, "learning_rate": 7.613934668119467e-06, "loss": 0.3906, "step": 2531 }, { "epoch": 1.177491861726864, "grad_norm": 0.44489437341690063, "learning_rate": 7.611627573378466e-06, "loss": 0.3505, "step": 2532 }, { "epoch": 1.177956905906061, "grad_norm": 0.4203685522079468, "learning_rate": 7.609319713734169e-06, "loss": 0.3437, "step": 2533 }, { "epoch": 1.1784219500852582, "grad_norm": 0.47429367899894714, "learning_rate": 7.607011089862512e-06, "loss": 0.3635, "step": 2534 }, { "epoch": 1.178886994264455, "grad_norm": 0.4002319276332855, "learning_rate": 7.604701702439652e-06, "loss": 0.3221, "step": 2535 }, { "epoch": 1.1793520384436522, "grad_norm": 0.45908692479133606, "learning_rate": 7.602391552141972e-06, "loss": 0.3561, "step": 2536 }, { "epoch": 1.1798170826228491, "grad_norm": 0.4550569951534271, "learning_rate": 7.600080639646077e-06, "loss": 0.3201, "step": 2537 }, { "epoch": 1.1802821268020462, "grad_norm": 0.4702633321285248, "learning_rate": 7.597768965628798e-06, "loss": 0.36, "step": 2538 }, { "epoch": 1.1807471709812432, "grad_norm": 0.5423824787139893, "learning_rate": 7.595456530767185e-06, "loss": 0.3632, "step": 2539 }, { "epoch": 1.1812122151604403, "grad_norm": 0.44125810265541077, "learning_rate": 7.593143335738511e-06, "loss": 0.3491, "step": 2540 }, { "epoch": 1.1816772593396372, "grad_norm": 0.4145257771015167, "learning_rate": 7.590829381220275e-06, "loss": 0.3175, "step": 2541 }, { "epoch": 1.1821423035188343, "grad_norm": 0.5215808153152466, "learning_rate": 7.5885146678901954e-06, "loss": 0.3891, "step": 2542 }, { "epoch": 1.1826073476980312, "grad_norm": 0.4379088878631592, "learning_rate": 7.586199196426216e-06, "loss": 0.3551, "step": 2543 }, { "epoch": 1.1830723918772283, "grad_norm": 0.42644643783569336, "learning_rate": 7.583882967506502e-06, "loss": 0.3282, "step": 2544 }, { "epoch": 1.1835374360564255, "grad_norm": 0.4398006200790405, "learning_rate": 7.581565981809435e-06, "loss": 0.3172, "step": 2545 }, { "epoch": 1.1840024802356224, "grad_norm": 0.46293768286705017, "learning_rate": 7.579248240013626e-06, "loss": 0.3876, "step": 2546 }, { "epoch": 1.1844675244148193, "grad_norm": 0.46180588006973267, "learning_rate": 7.576929742797902e-06, "loss": 0.3936, "step": 2547 }, { "epoch": 1.1849325685940164, "grad_norm": 0.4307175874710083, "learning_rate": 7.574610490841312e-06, "loss": 0.3386, "step": 2548 }, { "epoch": 1.1853976127732135, "grad_norm": 0.3494255542755127, "learning_rate": 7.5722904848231315e-06, "loss": 0.3124, "step": 2549 }, { "epoch": 1.1858626569524104, "grad_norm": 0.4179413914680481, "learning_rate": 7.5699697254228496e-06, "loss": 0.3271, "step": 2550 }, { "epoch": 1.1863277011316076, "grad_norm": 0.47171148657798767, "learning_rate": 7.56764821332018e-06, "loss": 0.3899, "step": 2551 }, { "epoch": 1.1867927453108045, "grad_norm": 0.4213588535785675, "learning_rate": 7.565325949195055e-06, "loss": 0.3373, "step": 2552 }, { "epoch": 1.1872577894900016, "grad_norm": 0.48638585209846497, "learning_rate": 7.563002933727628e-06, "loss": 0.3578, "step": 2553 }, { "epoch": 1.1877228336691985, "grad_norm": 0.4090135097503662, "learning_rate": 7.560679167598273e-06, "loss": 0.3496, "step": 2554 }, { "epoch": 1.1881878778483956, "grad_norm": 0.44248971343040466, "learning_rate": 7.558354651487583e-06, "loss": 0.3446, "step": 2555 }, { "epoch": 1.1886529220275925, "grad_norm": 0.40973594784736633, "learning_rate": 7.556029386076371e-06, "loss": 0.3623, "step": 2556 }, { "epoch": 1.1891179662067897, "grad_norm": 0.4441074728965759, "learning_rate": 7.553703372045671e-06, "loss": 0.3421, "step": 2557 }, { "epoch": 1.1895830103859866, "grad_norm": 0.48949727416038513, "learning_rate": 7.5513766100767334e-06, "loss": 0.3372, "step": 2558 }, { "epoch": 1.1900480545651837, "grad_norm": 0.4103635251522064, "learning_rate": 7.549049100851029e-06, "loss": 0.3492, "step": 2559 }, { "epoch": 1.1905130987443808, "grad_norm": 0.395436555147171, "learning_rate": 7.546720845050247e-06, "loss": 0.3475, "step": 2560 }, { "epoch": 1.1909781429235777, "grad_norm": 0.4113844335079193, "learning_rate": 7.544391843356298e-06, "loss": 0.3744, "step": 2561 }, { "epoch": 1.1914431871027749, "grad_norm": 0.397686243057251, "learning_rate": 7.542062096451306e-06, "loss": 0.333, "step": 2562 }, { "epoch": 1.1919082312819718, "grad_norm": 0.4004164934158325, "learning_rate": 7.539731605017616e-06, "loss": 0.3354, "step": 2563 }, { "epoch": 1.192373275461169, "grad_norm": 0.43825533986091614, "learning_rate": 7.537400369737793e-06, "loss": 0.3887, "step": 2564 }, { "epoch": 1.1928383196403658, "grad_norm": 0.433907151222229, "learning_rate": 7.535068391294618e-06, "loss": 0.3413, "step": 2565 }, { "epoch": 1.193303363819563, "grad_norm": 0.3985256254673004, "learning_rate": 7.532735670371088e-06, "loss": 0.3353, "step": 2566 }, { "epoch": 1.1937684079987598, "grad_norm": 0.4107392728328705, "learning_rate": 7.530402207650418e-06, "loss": 0.3753, "step": 2567 }, { "epoch": 1.194233452177957, "grad_norm": 0.40422889590263367, "learning_rate": 7.528068003816045e-06, "loss": 0.3373, "step": 2568 }, { "epoch": 1.1946984963571539, "grad_norm": 0.44744202494621277, "learning_rate": 7.525733059551618e-06, "loss": 0.3985, "step": 2569 }, { "epoch": 1.195163540536351, "grad_norm": 0.4296942353248596, "learning_rate": 7.523397375541003e-06, "loss": 0.3075, "step": 2570 }, { "epoch": 1.195628584715548, "grad_norm": 0.47275644540786743, "learning_rate": 7.521060952468284e-06, "loss": 0.3916, "step": 2571 }, { "epoch": 1.196093628894745, "grad_norm": 0.4506266117095947, "learning_rate": 7.518723791017762e-06, "loss": 0.3457, "step": 2572 }, { "epoch": 1.196558673073942, "grad_norm": 0.5241588354110718, "learning_rate": 7.516385891873954e-06, "loss": 0.398, "step": 2573 }, { "epoch": 1.197023717253139, "grad_norm": 0.38779518008232117, "learning_rate": 7.5140472557215945e-06, "loss": 0.3554, "step": 2574 }, { "epoch": 1.1974887614323362, "grad_norm": 0.418839693069458, "learning_rate": 7.511707883245627e-06, "loss": 0.3467, "step": 2575 }, { "epoch": 1.197953805611533, "grad_norm": 0.5039485692977905, "learning_rate": 7.50936777513122e-06, "loss": 0.4058, "step": 2576 }, { "epoch": 1.1984188497907302, "grad_norm": 0.4700038433074951, "learning_rate": 7.50702693206375e-06, "loss": 0.3266, "step": 2577 }, { "epoch": 1.1988838939699271, "grad_norm": 0.43291565775871277, "learning_rate": 7.5046853547288155e-06, "loss": 0.3645, "step": 2578 }, { "epoch": 1.1993489381491242, "grad_norm": 0.45220211148262024, "learning_rate": 7.502343043812224e-06, "loss": 0.3327, "step": 2579 }, { "epoch": 1.1998139823283211, "grad_norm": 0.4677691161632538, "learning_rate": 7.500000000000001e-06, "loss": 0.3708, "step": 2580 }, { "epoch": 1.2002790265075183, "grad_norm": 0.43373778462409973, "learning_rate": 7.497656223978385e-06, "loss": 0.339, "step": 2581 }, { "epoch": 1.2007440706867152, "grad_norm": 0.44764167070388794, "learning_rate": 7.495311716433833e-06, "loss": 0.3613, "step": 2582 }, { "epoch": 1.2012091148659123, "grad_norm": 0.4182588756084442, "learning_rate": 7.492966478053009e-06, "loss": 0.3325, "step": 2583 }, { "epoch": 1.2016741590451092, "grad_norm": 0.35634884238243103, "learning_rate": 7.490620509522797e-06, "loss": 0.3005, "step": 2584 }, { "epoch": 1.2021392032243063, "grad_norm": 0.48205626010894775, "learning_rate": 7.488273811530294e-06, "loss": 0.4052, "step": 2585 }, { "epoch": 1.2026042474035032, "grad_norm": 0.41472601890563965, "learning_rate": 7.48592638476281e-06, "loss": 0.3258, "step": 2586 }, { "epoch": 1.2030692915827004, "grad_norm": 0.4383096992969513, "learning_rate": 7.483578229907866e-06, "loss": 0.3553, "step": 2587 }, { "epoch": 1.2035343357618973, "grad_norm": 0.45658400654792786, "learning_rate": 7.481229347653201e-06, "loss": 0.3551, "step": 2588 }, { "epoch": 1.2039993799410944, "grad_norm": 0.4237629771232605, "learning_rate": 7.4788797386867596e-06, "loss": 0.3481, "step": 2589 }, { "epoch": 1.2044644241202915, "grad_norm": 0.4323580861091614, "learning_rate": 7.47652940369671e-06, "loss": 0.3566, "step": 2590 }, { "epoch": 1.2049294682994884, "grad_norm": 0.4833803176879883, "learning_rate": 7.474178343371425e-06, "loss": 0.3718, "step": 2591 }, { "epoch": 1.2053945124786856, "grad_norm": 0.4594928026199341, "learning_rate": 7.471826558399492e-06, "loss": 0.3428, "step": 2592 }, { "epoch": 1.2058595566578825, "grad_norm": 0.4109436571598053, "learning_rate": 7.469474049469709e-06, "loss": 0.3287, "step": 2593 }, { "epoch": 1.2063246008370796, "grad_norm": 0.43107515573501587, "learning_rate": 7.467120817271091e-06, "loss": 0.3694, "step": 2594 }, { "epoch": 1.2067896450162765, "grad_norm": 0.4271430969238281, "learning_rate": 7.464766862492856e-06, "loss": 0.3546, "step": 2595 }, { "epoch": 1.2072546891954736, "grad_norm": 0.4466875195503235, "learning_rate": 7.4624121858244455e-06, "loss": 0.357, "step": 2596 }, { "epoch": 1.2077197333746705, "grad_norm": 0.3861998915672302, "learning_rate": 7.460056787955502e-06, "loss": 0.3172, "step": 2597 }, { "epoch": 1.2081847775538677, "grad_norm": 0.4079117774963379, "learning_rate": 7.4577006695758845e-06, "loss": 0.3402, "step": 2598 }, { "epoch": 1.2086498217330646, "grad_norm": 0.521819531917572, "learning_rate": 7.455343831375662e-06, "loss": 0.4022, "step": 2599 }, { "epoch": 1.2091148659122617, "grad_norm": 0.45120519399642944, "learning_rate": 7.452986274045114e-06, "loss": 0.3237, "step": 2600 }, { "epoch": 1.2095799100914586, "grad_norm": 0.42835864424705505, "learning_rate": 7.45062799827473e-06, "loss": 0.3277, "step": 2601 }, { "epoch": 1.2100449542706557, "grad_norm": 0.5196753144264221, "learning_rate": 7.4482690047552105e-06, "loss": 0.3841, "step": 2602 }, { "epoch": 1.2105099984498526, "grad_norm": 0.616445004940033, "learning_rate": 7.445909294177469e-06, "loss": 0.4043, "step": 2603 }, { "epoch": 1.2109750426290498, "grad_norm": 0.35232779383659363, "learning_rate": 7.4435488672326235e-06, "loss": 0.293, "step": 2604 }, { "epoch": 1.2114400868082469, "grad_norm": 0.5146499276161194, "learning_rate": 7.441187724612007e-06, "loss": 0.3402, "step": 2605 }, { "epoch": 1.2119051309874438, "grad_norm": 0.5325912833213806, "learning_rate": 7.438825867007156e-06, "loss": 0.3614, "step": 2606 }, { "epoch": 1.212370175166641, "grad_norm": 0.4090646803379059, "learning_rate": 7.436463295109824e-06, "loss": 0.3651, "step": 2607 }, { "epoch": 1.2128352193458378, "grad_norm": 0.4489936828613281, "learning_rate": 7.434100009611969e-06, "loss": 0.3855, "step": 2608 }, { "epoch": 1.213300263525035, "grad_norm": 0.410040020942688, "learning_rate": 7.431736011205759e-06, "loss": 0.3056, "step": 2609 }, { "epoch": 1.2137653077042319, "grad_norm": 0.4164040982723236, "learning_rate": 7.42937130058357e-06, "loss": 0.3439, "step": 2610 }, { "epoch": 1.214230351883429, "grad_norm": 0.4618469774723053, "learning_rate": 7.427005878437989e-06, "loss": 0.3526, "step": 2611 }, { "epoch": 1.2146953960626259, "grad_norm": 0.4020163416862488, "learning_rate": 7.4246397454618054e-06, "loss": 0.3325, "step": 2612 }, { "epoch": 1.215160440241823, "grad_norm": 0.442120224237442, "learning_rate": 7.422272902348027e-06, "loss": 0.3265, "step": 2613 }, { "epoch": 1.21562548442102, "grad_norm": 0.527863085269928, "learning_rate": 7.41990534978986e-06, "loss": 0.3838, "step": 2614 }, { "epoch": 1.216090528600217, "grad_norm": 0.4838730990886688, "learning_rate": 7.417537088480722e-06, "loss": 0.3671, "step": 2615 }, { "epoch": 1.216555572779414, "grad_norm": 0.3663950264453888, "learning_rate": 7.41516811911424e-06, "loss": 0.3008, "step": 2616 }, { "epoch": 1.217020616958611, "grad_norm": 0.4481393098831177, "learning_rate": 7.412798442384246e-06, "loss": 0.375, "step": 2617 }, { "epoch": 1.217485661137808, "grad_norm": 0.500136137008667, "learning_rate": 7.4104280589847785e-06, "loss": 0.3493, "step": 2618 }, { "epoch": 1.2179507053170051, "grad_norm": 0.4056425988674164, "learning_rate": 7.408056969610087e-06, "loss": 0.3305, "step": 2619 }, { "epoch": 1.2184157494962022, "grad_norm": 0.3582363724708557, "learning_rate": 7.405685174954623e-06, "loss": 0.3193, "step": 2620 }, { "epoch": 1.2188807936753991, "grad_norm": 0.5270162224769592, "learning_rate": 7.403312675713047e-06, "loss": 0.3598, "step": 2621 }, { "epoch": 1.2193458378545963, "grad_norm": 0.5136641263961792, "learning_rate": 7.400939472580227e-06, "loss": 0.3283, "step": 2622 }, { "epoch": 1.2198108820337932, "grad_norm": 0.46304944157600403, "learning_rate": 7.398565566251232e-06, "loss": 0.3431, "step": 2623 }, { "epoch": 1.2202759262129903, "grad_norm": 0.45918190479278564, "learning_rate": 7.396190957421343e-06, "loss": 0.3276, "step": 2624 }, { "epoch": 1.2207409703921872, "grad_norm": 0.5070422291755676, "learning_rate": 7.393815646786047e-06, "loss": 0.3519, "step": 2625 }, { "epoch": 1.2212060145713843, "grad_norm": 0.45554548501968384, "learning_rate": 7.3914396350410285e-06, "loss": 0.3358, "step": 2626 }, { "epoch": 1.2216710587505812, "grad_norm": 0.43549612164497375, "learning_rate": 7.389062922882187e-06, "loss": 0.3329, "step": 2627 }, { "epoch": 1.2221361029297784, "grad_norm": 0.4475855231285095, "learning_rate": 7.3866855110056205e-06, "loss": 0.3651, "step": 2628 }, { "epoch": 1.2226011471089753, "grad_norm": 0.47854486107826233, "learning_rate": 7.384307400107635e-06, "loss": 0.3577, "step": 2629 }, { "epoch": 1.2230661912881724, "grad_norm": 0.4717871844768524, "learning_rate": 7.381928590884741e-06, "loss": 0.4082, "step": 2630 }, { "epoch": 1.2235312354673693, "grad_norm": 0.42780524492263794, "learning_rate": 7.379549084033653e-06, "loss": 0.3446, "step": 2631 }, { "epoch": 1.2239962796465664, "grad_norm": 0.41821184754371643, "learning_rate": 7.37716888025129e-06, "loss": 0.3719, "step": 2632 }, { "epoch": 1.2244613238257633, "grad_norm": 0.43905121088027954, "learning_rate": 7.374787980234775e-06, "loss": 0.3395, "step": 2633 }, { "epoch": 1.2249263680049605, "grad_norm": 0.44092613458633423, "learning_rate": 7.372406384681433e-06, "loss": 0.3066, "step": 2634 }, { "epoch": 1.2253914121841576, "grad_norm": 0.5179281234741211, "learning_rate": 7.370024094288797e-06, "loss": 0.3801, "step": 2635 }, { "epoch": 1.2258564563633545, "grad_norm": 0.4214264452457428, "learning_rate": 7.3676411097546e-06, "loss": 0.3758, "step": 2636 }, { "epoch": 1.2263215005425516, "grad_norm": 0.43435075879096985, "learning_rate": 7.36525743177678e-06, "loss": 0.3487, "step": 2637 }, { "epoch": 1.2267865447217485, "grad_norm": 0.4535754323005676, "learning_rate": 7.362873061053479e-06, "loss": 0.3527, "step": 2638 }, { "epoch": 1.2272515889009457, "grad_norm": 0.4143884778022766, "learning_rate": 7.360487998283038e-06, "loss": 0.3025, "step": 2639 }, { "epoch": 1.2277166330801426, "grad_norm": 0.4507713317871094, "learning_rate": 7.358102244164003e-06, "loss": 0.3842, "step": 2640 }, { "epoch": 1.2281816772593397, "grad_norm": 0.4342232942581177, "learning_rate": 7.355715799395126e-06, "loss": 0.37, "step": 2641 }, { "epoch": 1.2286467214385366, "grad_norm": 0.38228335976600647, "learning_rate": 7.353328664675353e-06, "loss": 0.3279, "step": 2642 }, { "epoch": 1.2291117656177337, "grad_norm": 0.42254742980003357, "learning_rate": 7.350940840703842e-06, "loss": 0.3522, "step": 2643 }, { "epoch": 1.2295768097969306, "grad_norm": 0.48193877935409546, "learning_rate": 7.348552328179945e-06, "loss": 0.3823, "step": 2644 }, { "epoch": 1.2300418539761278, "grad_norm": 0.4392937123775482, "learning_rate": 7.3461631278032175e-06, "loss": 0.3688, "step": 2645 }, { "epoch": 1.2305068981553249, "grad_norm": 0.545441210269928, "learning_rate": 7.34377324027342e-06, "loss": 0.3537, "step": 2646 }, { "epoch": 1.2309719423345218, "grad_norm": 0.43187499046325684, "learning_rate": 7.3413826662905104e-06, "loss": 0.314, "step": 2647 }, { "epoch": 1.2314369865137187, "grad_norm": 0.4856451451778412, "learning_rate": 7.33899140655465e-06, "loss": 0.386, "step": 2648 }, { "epoch": 1.2319020306929158, "grad_norm": 0.4500555396080017, "learning_rate": 7.336599461766199e-06, "loss": 0.33, "step": 2649 }, { "epoch": 1.232367074872113, "grad_norm": 0.41082900762557983, "learning_rate": 7.334206832625719e-06, "loss": 0.3631, "step": 2650 }, { "epoch": 1.2328321190513098, "grad_norm": 0.43610817193984985, "learning_rate": 7.331813519833972e-06, "loss": 0.3594, "step": 2651 }, { "epoch": 1.233297163230507, "grad_norm": 0.3768427073955536, "learning_rate": 7.329419524091923e-06, "loss": 0.3397, "step": 2652 }, { "epoch": 1.2337622074097039, "grad_norm": 0.4704180657863617, "learning_rate": 7.32702484610073e-06, "loss": 0.339, "step": 2653 }, { "epoch": 1.234227251588901, "grad_norm": 0.5134731531143188, "learning_rate": 7.32462948656176e-06, "loss": 0.389, "step": 2654 }, { "epoch": 1.234692295768098, "grad_norm": 0.454752117395401, "learning_rate": 7.322233446176571e-06, "loss": 0.3489, "step": 2655 }, { "epoch": 1.235157339947295, "grad_norm": 0.4670364260673523, "learning_rate": 7.319836725646927e-06, "loss": 0.3759, "step": 2656 }, { "epoch": 1.235622384126492, "grad_norm": 0.423016220331192, "learning_rate": 7.317439325674788e-06, "loss": 0.3212, "step": 2657 }, { "epoch": 1.236087428305689, "grad_norm": 0.5560788512229919, "learning_rate": 7.315041246962313e-06, "loss": 0.3528, "step": 2658 }, { "epoch": 1.236552472484886, "grad_norm": 0.3992140591144562, "learning_rate": 7.312642490211857e-06, "loss": 0.3611, "step": 2659 }, { "epoch": 1.237017516664083, "grad_norm": 0.49364173412323, "learning_rate": 7.310243056125984e-06, "loss": 0.3515, "step": 2660 }, { "epoch": 1.2374825608432802, "grad_norm": 0.5286840796470642, "learning_rate": 7.307842945407444e-06, "loss": 0.3108, "step": 2661 }, { "epoch": 1.2379476050224771, "grad_norm": 0.4787932336330414, "learning_rate": 7.3054421587591925e-06, "loss": 0.3592, "step": 2662 }, { "epoch": 1.238412649201674, "grad_norm": 0.4455261528491974, "learning_rate": 7.30304069688438e-06, "loss": 0.3157, "step": 2663 }, { "epoch": 1.2388776933808712, "grad_norm": 0.4244392216205597, "learning_rate": 7.300638560486357e-06, "loss": 0.3612, "step": 2664 }, { "epoch": 1.2393427375600683, "grad_norm": 0.4475732147693634, "learning_rate": 7.298235750268667e-06, "loss": 0.3858, "step": 2665 }, { "epoch": 1.2398077817392652, "grad_norm": 0.4742658734321594, "learning_rate": 7.295832266935059e-06, "loss": 0.3213, "step": 2666 }, { "epoch": 1.2402728259184623, "grad_norm": 0.4781046509742737, "learning_rate": 7.29342811118947e-06, "loss": 0.3711, "step": 2667 }, { "epoch": 1.2407378700976592, "grad_norm": 0.4173920750617981, "learning_rate": 7.29102328373604e-06, "loss": 0.3644, "step": 2668 }, { "epoch": 1.2412029142768564, "grad_norm": 0.4399764835834503, "learning_rate": 7.288617785279104e-06, "loss": 0.3529, "step": 2669 }, { "epoch": 1.2416679584560533, "grad_norm": 0.41541317105293274, "learning_rate": 7.286211616523193e-06, "loss": 0.3791, "step": 2670 }, { "epoch": 1.2421330026352504, "grad_norm": 0.4661194682121277, "learning_rate": 7.283804778173035e-06, "loss": 0.3629, "step": 2671 }, { "epoch": 1.2425980468144473, "grad_norm": 0.45775488018989563, "learning_rate": 7.281397270933553e-06, "loss": 0.3493, "step": 2672 }, { "epoch": 1.2430630909936444, "grad_norm": 0.45113879442214966, "learning_rate": 7.278989095509867e-06, "loss": 0.3641, "step": 2673 }, { "epoch": 1.2435281351728413, "grad_norm": 0.4759120047092438, "learning_rate": 7.2765802526072945e-06, "loss": 0.3871, "step": 2674 }, { "epoch": 1.2439931793520385, "grad_norm": 0.35728970170021057, "learning_rate": 7.274170742931345e-06, "loss": 0.3399, "step": 2675 }, { "epoch": 1.2444582235312356, "grad_norm": 0.41665512323379517, "learning_rate": 7.271760567187723e-06, "loss": 0.3501, "step": 2676 }, { "epoch": 1.2449232677104325, "grad_norm": 0.5279736518859863, "learning_rate": 7.269349726082332e-06, "loss": 0.3652, "step": 2677 }, { "epoch": 1.2453883118896294, "grad_norm": 0.3690641522407532, "learning_rate": 7.266938220321268e-06, "loss": 0.3099, "step": 2678 }, { "epoch": 1.2458533560688265, "grad_norm": 0.49263355135917664, "learning_rate": 7.264526050610821e-06, "loss": 0.3927, "step": 2679 }, { "epoch": 1.2463184002480237, "grad_norm": 0.3883993327617645, "learning_rate": 7.2621132176574774e-06, "loss": 0.3367, "step": 2680 }, { "epoch": 1.2467834444272206, "grad_norm": 0.4446150064468384, "learning_rate": 7.2596997221679156e-06, "loss": 0.3588, "step": 2681 }, { "epoch": 1.2472484886064177, "grad_norm": 0.5633254051208496, "learning_rate": 7.2572855648490105e-06, "loss": 0.3633, "step": 2682 }, { "epoch": 1.2477135327856146, "grad_norm": 0.4423445165157318, "learning_rate": 7.25487074640783e-06, "loss": 0.3665, "step": 2683 }, { "epoch": 1.2481785769648117, "grad_norm": 0.48402294516563416, "learning_rate": 7.252455267551631e-06, "loss": 0.3508, "step": 2684 }, { "epoch": 1.2486436211440086, "grad_norm": 0.53091961145401, "learning_rate": 7.250039128987874e-06, "loss": 0.3603, "step": 2685 }, { "epoch": 1.2491086653232057, "grad_norm": 0.47006434202194214, "learning_rate": 7.247622331424202e-06, "loss": 0.3463, "step": 2686 }, { "epoch": 1.2495737095024027, "grad_norm": 0.5498688817024231, "learning_rate": 7.245204875568459e-06, "loss": 0.3642, "step": 2687 }, { "epoch": 1.2500387536815998, "grad_norm": 0.4160943627357483, "learning_rate": 7.242786762128675e-06, "loss": 0.3097, "step": 2688 }, { "epoch": 1.2505037978607967, "grad_norm": 0.38064897060394287, "learning_rate": 7.240367991813079e-06, "loss": 0.3235, "step": 2689 }, { "epoch": 1.2509688420399938, "grad_norm": 0.3919866383075714, "learning_rate": 7.237948565330091e-06, "loss": 0.3477, "step": 2690 }, { "epoch": 1.251433886219191, "grad_norm": 0.4545346200466156, "learning_rate": 7.235528483388318e-06, "loss": 0.3706, "step": 2691 }, { "epoch": 1.2518989303983878, "grad_norm": 0.43774187564849854, "learning_rate": 7.233107746696564e-06, "loss": 0.3314, "step": 2692 }, { "epoch": 1.2523639745775847, "grad_norm": 0.44677144289016724, "learning_rate": 7.230686355963824e-06, "loss": 0.3369, "step": 2693 }, { "epoch": 1.2528290187567819, "grad_norm": 0.4839126467704773, "learning_rate": 7.228264311899284e-06, "loss": 0.3949, "step": 2694 }, { "epoch": 1.253294062935979, "grad_norm": 0.5056577920913696, "learning_rate": 7.22584161521232e-06, "loss": 0.3997, "step": 2695 }, { "epoch": 1.253759107115176, "grad_norm": 0.35565152764320374, "learning_rate": 7.223418266612503e-06, "loss": 0.3188, "step": 2696 }, { "epoch": 1.254224151294373, "grad_norm": 0.3856726884841919, "learning_rate": 7.220994266809591e-06, "loss": 0.3519, "step": 2697 }, { "epoch": 1.25468919547357, "grad_norm": 0.46455973386764526, "learning_rate": 7.218569616513533e-06, "loss": 0.357, "step": 2698 }, { "epoch": 1.255154239652767, "grad_norm": 0.37406978011131287, "learning_rate": 7.2161443164344725e-06, "loss": 0.3416, "step": 2699 }, { "epoch": 1.255619283831964, "grad_norm": 0.43167954683303833, "learning_rate": 7.213718367282737e-06, "loss": 0.3567, "step": 2700 }, { "epoch": 1.256084328011161, "grad_norm": 0.41151732206344604, "learning_rate": 7.21129176976885e-06, "loss": 0.3307, "step": 2701 }, { "epoch": 1.256549372190358, "grad_norm": 0.4612469971179962, "learning_rate": 7.208864524603523e-06, "loss": 0.3684, "step": 2702 }, { "epoch": 1.2570144163695551, "grad_norm": 0.4303331673145294, "learning_rate": 7.206436632497656e-06, "loss": 0.3358, "step": 2703 }, { "epoch": 1.257479460548752, "grad_norm": 0.4344772696495056, "learning_rate": 7.204008094162338e-06, "loss": 0.3417, "step": 2704 }, { "epoch": 1.2579445047279492, "grad_norm": 0.4836406409740448, "learning_rate": 7.201578910308848e-06, "loss": 0.3793, "step": 2705 }, { "epoch": 1.2584095489071463, "grad_norm": 0.41068270802497864, "learning_rate": 7.199149081648658e-06, "loss": 0.3643, "step": 2706 }, { "epoch": 1.2588745930863432, "grad_norm": 0.4286365211009979, "learning_rate": 7.196718608893421e-06, "loss": 0.3287, "step": 2707 }, { "epoch": 1.25933963726554, "grad_norm": 0.4405948519706726, "learning_rate": 7.1942874927549865e-06, "loss": 0.3697, "step": 2708 }, { "epoch": 1.2598046814447372, "grad_norm": 0.36979782581329346, "learning_rate": 7.191855733945388e-06, "loss": 0.3286, "step": 2709 }, { "epoch": 1.2602697256239344, "grad_norm": 0.40628305077552795, "learning_rate": 7.189423333176847e-06, "loss": 0.3815, "step": 2710 }, { "epoch": 1.2607347698031313, "grad_norm": 0.42872005701065063, "learning_rate": 7.186990291161775e-06, "loss": 0.3473, "step": 2711 }, { "epoch": 1.2611998139823284, "grad_norm": 0.4314822852611542, "learning_rate": 7.184556608612769e-06, "loss": 0.3791, "step": 2712 }, { "epoch": 1.2616648581615253, "grad_norm": 0.42616841197013855, "learning_rate": 7.182122286242617e-06, "loss": 0.3513, "step": 2713 }, { "epoch": 1.2621299023407224, "grad_norm": 0.4406971037387848, "learning_rate": 7.1796873247642925e-06, "loss": 0.3609, "step": 2714 }, { "epoch": 1.2625949465199193, "grad_norm": 0.38528046011924744, "learning_rate": 7.177251724890957e-06, "loss": 0.3372, "step": 2715 }, { "epoch": 1.2630599906991165, "grad_norm": 0.4626089334487915, "learning_rate": 7.1748154873359556e-06, "loss": 0.3702, "step": 2716 }, { "epoch": 1.2635250348783134, "grad_norm": 0.3866967260837555, "learning_rate": 7.172378612812824e-06, "loss": 0.3385, "step": 2717 }, { "epoch": 1.2639900790575105, "grad_norm": 0.43559587001800537, "learning_rate": 7.169941102035281e-06, "loss": 0.3711, "step": 2718 }, { "epoch": 1.2644551232367074, "grad_norm": 0.4723300635814667, "learning_rate": 7.167502955717238e-06, "loss": 0.3782, "step": 2719 }, { "epoch": 1.2649201674159045, "grad_norm": 0.4137257933616638, "learning_rate": 7.165064174572787e-06, "loss": 0.3603, "step": 2720 }, { "epoch": 1.2653852115951016, "grad_norm": 0.5321688652038574, "learning_rate": 7.162624759316205e-06, "loss": 0.3839, "step": 2721 }, { "epoch": 1.2658502557742985, "grad_norm": 0.47028544545173645, "learning_rate": 7.160184710661959e-06, "loss": 0.3513, "step": 2722 }, { "epoch": 1.2663152999534955, "grad_norm": 0.5355919599533081, "learning_rate": 7.1577440293247e-06, "loss": 0.3701, "step": 2723 }, { "epoch": 1.2667803441326926, "grad_norm": 0.4389120936393738, "learning_rate": 7.155302716019263e-06, "loss": 0.3273, "step": 2724 }, { "epoch": 1.2672453883118897, "grad_norm": 0.4850804805755615, "learning_rate": 7.1528607714606706e-06, "loss": 0.3639, "step": 2725 }, { "epoch": 1.2677104324910866, "grad_norm": 0.4661966562271118, "learning_rate": 7.1504181963641265e-06, "loss": 0.3097, "step": 2726 }, { "epoch": 1.2681754766702837, "grad_norm": 0.4354253113269806, "learning_rate": 7.147974991445021e-06, "loss": 0.3555, "step": 2727 }, { "epoch": 1.2686405208494806, "grad_norm": 0.46909964084625244, "learning_rate": 7.145531157418932e-06, "loss": 0.3747, "step": 2728 }, { "epoch": 1.2691055650286778, "grad_norm": 0.5229499340057373, "learning_rate": 7.143086695001616e-06, "loss": 0.3623, "step": 2729 }, { "epoch": 1.2695706092078747, "grad_norm": 0.42865434288978577, "learning_rate": 7.14064160490902e-06, "loss": 0.3392, "step": 2730 }, { "epoch": 1.2700356533870718, "grad_norm": 0.46530023217201233, "learning_rate": 7.1381958878572665e-06, "loss": 0.3611, "step": 2731 }, { "epoch": 1.270500697566269, "grad_norm": 0.5126709938049316, "learning_rate": 7.135749544562669e-06, "loss": 0.3519, "step": 2732 }, { "epoch": 1.2709657417454658, "grad_norm": 0.4154670536518097, "learning_rate": 7.133302575741722e-06, "loss": 0.3326, "step": 2733 }, { "epoch": 1.2714307859246627, "grad_norm": 0.4609792232513428, "learning_rate": 7.130854982111102e-06, "loss": 0.375, "step": 2734 }, { "epoch": 1.2718958301038599, "grad_norm": 0.4117323160171509, "learning_rate": 7.128406764387667e-06, "loss": 0.3381, "step": 2735 }, { "epoch": 1.272360874283057, "grad_norm": 0.47705698013305664, "learning_rate": 7.1259579232884655e-06, "loss": 0.3487, "step": 2736 }, { "epoch": 1.272825918462254, "grad_norm": 0.464181512594223, "learning_rate": 7.12350845953072e-06, "loss": 0.333, "step": 2737 }, { "epoch": 1.2732909626414508, "grad_norm": 0.41154077649116516, "learning_rate": 7.12105837383184e-06, "loss": 0.3457, "step": 2738 }, { "epoch": 1.273756006820648, "grad_norm": 0.4273618459701538, "learning_rate": 7.118607666909413e-06, "loss": 0.338, "step": 2739 }, { "epoch": 1.274221050999845, "grad_norm": 0.4530821740627289, "learning_rate": 7.116156339481215e-06, "loss": 0.3423, "step": 2740 }, { "epoch": 1.274686095179042, "grad_norm": 0.434441477060318, "learning_rate": 7.113704392265196e-06, "loss": 0.3672, "step": 2741 }, { "epoch": 1.275151139358239, "grad_norm": 0.44967541098594666, "learning_rate": 7.1112518259794946e-06, "loss": 0.3257, "step": 2742 }, { "epoch": 1.275616183537436, "grad_norm": 0.44372931122779846, "learning_rate": 7.108798641342428e-06, "loss": 0.313, "step": 2743 }, { "epoch": 1.2760812277166331, "grad_norm": 0.4219914674758911, "learning_rate": 7.106344839072492e-06, "loss": 0.361, "step": 2744 }, { "epoch": 1.27654627189583, "grad_norm": 0.4370742738246918, "learning_rate": 7.103890419888367e-06, "loss": 0.3304, "step": 2745 }, { "epoch": 1.2770113160750272, "grad_norm": 0.41166791319847107, "learning_rate": 7.1014353845089126e-06, "loss": 0.3618, "step": 2746 }, { "epoch": 1.2774763602542243, "grad_norm": 0.3665507137775421, "learning_rate": 7.098979733653165e-06, "loss": 0.3125, "step": 2747 }, { "epoch": 1.2779414044334212, "grad_norm": 0.4135091006755829, "learning_rate": 7.096523468040349e-06, "loss": 0.3494, "step": 2748 }, { "epoch": 1.278406448612618, "grad_norm": 0.5211425423622131, "learning_rate": 7.094066588389864e-06, "loss": 0.3392, "step": 2749 }, { "epoch": 1.2788714927918152, "grad_norm": 0.4214104413986206, "learning_rate": 7.09160909542129e-06, "loss": 0.3606, "step": 2750 }, { "epoch": 1.2793365369710124, "grad_norm": 0.4723619818687439, "learning_rate": 7.089150989854385e-06, "loss": 0.3315, "step": 2751 }, { "epoch": 1.2798015811502093, "grad_norm": 0.5669287443161011, "learning_rate": 7.08669227240909e-06, "loss": 0.3577, "step": 2752 }, { "epoch": 1.2802666253294062, "grad_norm": 0.43773379921913147, "learning_rate": 7.084232943805522e-06, "loss": 0.3266, "step": 2753 }, { "epoch": 1.2807316695086033, "grad_norm": 0.4219588041305542, "learning_rate": 7.081773004763981e-06, "loss": 0.3465, "step": 2754 }, { "epoch": 1.2811967136878004, "grad_norm": 0.45649266242980957, "learning_rate": 7.079312456004941e-06, "loss": 0.337, "step": 2755 }, { "epoch": 1.2816617578669973, "grad_norm": 0.49470004439353943, "learning_rate": 7.076851298249057e-06, "loss": 0.3538, "step": 2756 }, { "epoch": 1.2821268020461944, "grad_norm": 0.4066573679447174, "learning_rate": 7.074389532217163e-06, "loss": 0.3736, "step": 2757 }, { "epoch": 1.2825918462253914, "grad_norm": 0.41550639271736145, "learning_rate": 7.0719271586302675e-06, "loss": 0.3284, "step": 2758 }, { "epoch": 1.2830568904045885, "grad_norm": 0.45798173546791077, "learning_rate": 7.069464178209563e-06, "loss": 0.3366, "step": 2759 }, { "epoch": 1.2835219345837854, "grad_norm": 0.44275960326194763, "learning_rate": 7.067000591676416e-06, "loss": 0.3668, "step": 2760 }, { "epoch": 1.2839869787629825, "grad_norm": 0.40415266156196594, "learning_rate": 7.06453639975237e-06, "loss": 0.3625, "step": 2761 }, { "epoch": 1.2844520229421796, "grad_norm": 0.4864114224910736, "learning_rate": 7.062071603159147e-06, "loss": 0.3692, "step": 2762 }, { "epoch": 1.2849170671213765, "grad_norm": 0.4446645975112915, "learning_rate": 7.059606202618646e-06, "loss": 0.3724, "step": 2763 }, { "epoch": 1.2853821113005734, "grad_norm": 0.406984806060791, "learning_rate": 7.057140198852942e-06, "loss": 0.3384, "step": 2764 }, { "epoch": 1.2858471554797706, "grad_norm": 0.44974416494369507, "learning_rate": 7.054673592584289e-06, "loss": 0.3329, "step": 2765 }, { "epoch": 1.2863121996589677, "grad_norm": 0.4763135313987732, "learning_rate": 7.052206384535115e-06, "loss": 0.3411, "step": 2766 }, { "epoch": 1.2867772438381646, "grad_norm": 0.44069865345954895, "learning_rate": 7.0497385754280255e-06, "loss": 0.351, "step": 2767 }, { "epoch": 1.2872422880173615, "grad_norm": 0.40506044030189514, "learning_rate": 7.047270165985801e-06, "loss": 0.3856, "step": 2768 }, { "epoch": 1.2877073321965586, "grad_norm": 0.4121856689453125, "learning_rate": 7.0448011569314e-06, "loss": 0.3631, "step": 2769 }, { "epoch": 1.2881723763757558, "grad_norm": 0.4210520088672638, "learning_rate": 7.042331548987953e-06, "loss": 0.3079, "step": 2770 }, { "epoch": 1.2886374205549527, "grad_norm": 0.46315163373947144, "learning_rate": 7.039861342878769e-06, "loss": 0.3727, "step": 2771 }, { "epoch": 1.2891024647341498, "grad_norm": 0.43790408968925476, "learning_rate": 7.037390539327332e-06, "loss": 0.3682, "step": 2772 }, { "epoch": 1.2895675089133467, "grad_norm": 0.4229748845100403, "learning_rate": 7.0349191390573e-06, "loss": 0.3808, "step": 2773 }, { "epoch": 1.2900325530925438, "grad_norm": 0.44016340374946594, "learning_rate": 7.032447142792506e-06, "loss": 0.3573, "step": 2774 }, { "epoch": 1.2904975972717407, "grad_norm": 0.4395344853401184, "learning_rate": 7.029974551256957e-06, "loss": 0.3609, "step": 2775 }, { "epoch": 1.2909626414509379, "grad_norm": 0.4566013514995575, "learning_rate": 7.027501365174833e-06, "loss": 0.3355, "step": 2776 }, { "epoch": 1.291427685630135, "grad_norm": 0.44307535886764526, "learning_rate": 7.025027585270495e-06, "loss": 0.3421, "step": 2777 }, { "epoch": 1.291892729809332, "grad_norm": 0.4520743489265442, "learning_rate": 7.022553212268469e-06, "loss": 0.3466, "step": 2778 }, { "epoch": 1.2923577739885288, "grad_norm": 0.4748930335044861, "learning_rate": 7.0200782468934605e-06, "loss": 0.359, "step": 2779 }, { "epoch": 1.292822818167726, "grad_norm": 0.4355354905128479, "learning_rate": 7.017602689870345e-06, "loss": 0.3346, "step": 2780 }, { "epoch": 1.293287862346923, "grad_norm": 0.4658071994781494, "learning_rate": 7.015126541924174e-06, "loss": 0.3986, "step": 2781 }, { "epoch": 1.29375290652612, "grad_norm": 0.45585206151008606, "learning_rate": 7.012649803780171e-06, "loss": 0.3595, "step": 2782 }, { "epoch": 1.294217950705317, "grad_norm": 0.4219159781932831, "learning_rate": 7.010172476163732e-06, "loss": 0.3379, "step": 2783 }, { "epoch": 1.294682994884514, "grad_norm": 0.40543264150619507, "learning_rate": 7.007694559800427e-06, "loss": 0.3518, "step": 2784 }, { "epoch": 1.2951480390637111, "grad_norm": 0.4673357605934143, "learning_rate": 7.005216055415996e-06, "loss": 0.3548, "step": 2785 }, { "epoch": 1.295613083242908, "grad_norm": 0.43243858218193054, "learning_rate": 7.002736963736354e-06, "loss": 0.3667, "step": 2786 }, { "epoch": 1.2960781274221052, "grad_norm": 0.4363313913345337, "learning_rate": 7.000257285487586e-06, "loss": 0.3485, "step": 2787 }, { "epoch": 1.296543171601302, "grad_norm": 0.44257044792175293, "learning_rate": 6.997777021395949e-06, "loss": 0.3625, "step": 2788 }, { "epoch": 1.2970082157804992, "grad_norm": 0.448543906211853, "learning_rate": 6.9952961721878734e-06, "loss": 0.373, "step": 2789 }, { "epoch": 1.297473259959696, "grad_norm": 0.41273754835128784, "learning_rate": 6.992814738589958e-06, "loss": 0.3761, "step": 2790 }, { "epoch": 1.2979383041388932, "grad_norm": 0.540025532245636, "learning_rate": 6.990332721328978e-06, "loss": 0.3636, "step": 2791 }, { "epoch": 1.2984033483180903, "grad_norm": 0.42229142785072327, "learning_rate": 6.987850121131871e-06, "loss": 0.3319, "step": 2792 }, { "epoch": 1.2988683924972872, "grad_norm": 0.4574054777622223, "learning_rate": 6.985366938725751e-06, "loss": 0.3782, "step": 2793 }, { "epoch": 1.2993334366764842, "grad_norm": 0.4920805096626282, "learning_rate": 6.982883174837904e-06, "loss": 0.376, "step": 2794 }, { "epoch": 1.2997984808556813, "grad_norm": 0.40307578444480896, "learning_rate": 6.980398830195785e-06, "loss": 0.3285, "step": 2795 }, { "epoch": 1.3002635250348784, "grad_norm": 0.41613009572029114, "learning_rate": 6.977913905527016e-06, "loss": 0.3433, "step": 2796 }, { "epoch": 1.3007285692140753, "grad_norm": 0.44297316670417786, "learning_rate": 6.975428401559392e-06, "loss": 0.3558, "step": 2797 }, { "epoch": 1.3011936133932724, "grad_norm": 0.3465144634246826, "learning_rate": 6.972942319020876e-06, "loss": 0.3213, "step": 2798 }, { "epoch": 1.3016586575724693, "grad_norm": 0.40050673484802246, "learning_rate": 6.9704556586396e-06, "loss": 0.3431, "step": 2799 }, { "epoch": 1.3021237017516665, "grad_norm": 0.45212817192077637, "learning_rate": 6.967968421143869e-06, "loss": 0.3546, "step": 2800 }, { "epoch": 1.3025887459308634, "grad_norm": 0.4688425064086914, "learning_rate": 6.965480607262154e-06, "loss": 0.3559, "step": 2801 }, { "epoch": 1.3030537901100605, "grad_norm": 0.4439482092857361, "learning_rate": 6.962992217723094e-06, "loss": 0.3234, "step": 2802 }, { "epoch": 1.3035188342892574, "grad_norm": 0.43428176641464233, "learning_rate": 6.960503253255499e-06, "loss": 0.3435, "step": 2803 }, { "epoch": 1.3039838784684545, "grad_norm": 0.3863378167152405, "learning_rate": 6.958013714588348e-06, "loss": 0.3557, "step": 2804 }, { "epoch": 1.3044489226476514, "grad_norm": 0.40679073333740234, "learning_rate": 6.95552360245078e-06, "loss": 0.3292, "step": 2805 }, { "epoch": 1.3049139668268486, "grad_norm": 0.45496124029159546, "learning_rate": 6.953032917572116e-06, "loss": 0.3961, "step": 2806 }, { "epoch": 1.3053790110060457, "grad_norm": 0.3845018446445465, "learning_rate": 6.950541660681833e-06, "loss": 0.3522, "step": 2807 }, { "epoch": 1.3058440551852426, "grad_norm": 0.4297010004520416, "learning_rate": 6.94804983250958e-06, "loss": 0.3595, "step": 2808 }, { "epoch": 1.3063090993644395, "grad_norm": 0.3819620609283447, "learning_rate": 6.945557433785175e-06, "loss": 0.3066, "step": 2809 }, { "epoch": 1.3067741435436366, "grad_norm": 0.42445626854896545, "learning_rate": 6.943064465238598e-06, "loss": 0.3727, "step": 2810 }, { "epoch": 1.3072391877228338, "grad_norm": 0.4176945686340332, "learning_rate": 6.940570927600001e-06, "loss": 0.3371, "step": 2811 }, { "epoch": 1.3077042319020307, "grad_norm": 0.4216483235359192, "learning_rate": 6.9380768215996995e-06, "loss": 0.337, "step": 2812 }, { "epoch": 1.3081692760812278, "grad_norm": 0.4133038818836212, "learning_rate": 6.93558214796818e-06, "loss": 0.3371, "step": 2813 }, { "epoch": 1.3086343202604247, "grad_norm": 0.47481316328048706, "learning_rate": 6.933086907436087e-06, "loss": 0.4007, "step": 2814 }, { "epoch": 1.3090993644396218, "grad_norm": 0.41611889004707336, "learning_rate": 6.93059110073424e-06, "loss": 0.3471, "step": 2815 }, { "epoch": 1.3095644086188187, "grad_norm": 0.45148274302482605, "learning_rate": 6.928094728593617e-06, "loss": 0.3308, "step": 2816 }, { "epoch": 1.3100294527980159, "grad_norm": 0.4194914400577545, "learning_rate": 6.9255977917453665e-06, "loss": 0.3708, "step": 2817 }, { "epoch": 1.3104944969772128, "grad_norm": 0.472514808177948, "learning_rate": 6.923100290920801e-06, "loss": 0.3499, "step": 2818 }, { "epoch": 1.31095954115641, "grad_norm": 0.4619661569595337, "learning_rate": 6.920602226851397e-06, "loss": 0.3644, "step": 2819 }, { "epoch": 1.3114245853356068, "grad_norm": 0.3975506126880646, "learning_rate": 6.9181036002687985e-06, "loss": 0.3624, "step": 2820 }, { "epoch": 1.311889629514804, "grad_norm": 0.48552754521369934, "learning_rate": 6.91560441190481e-06, "loss": 0.3961, "step": 2821 }, { "epoch": 1.312354673694001, "grad_norm": 0.4577177166938782, "learning_rate": 6.913104662491406e-06, "loss": 0.3915, "step": 2822 }, { "epoch": 1.312819717873198, "grad_norm": 0.47331976890563965, "learning_rate": 6.910604352760721e-06, "loss": 0.3035, "step": 2823 }, { "epoch": 1.3132847620523949, "grad_norm": 0.4498820900917053, "learning_rate": 6.908103483445056e-06, "loss": 0.3242, "step": 2824 }, { "epoch": 1.313749806231592, "grad_norm": 0.5082352757453918, "learning_rate": 6.905602055276872e-06, "loss": 0.3974, "step": 2825 }, { "epoch": 1.3142148504107891, "grad_norm": 0.4770672023296356, "learning_rate": 6.9031000689888015e-06, "loss": 0.3467, "step": 2826 }, { "epoch": 1.314679894589986, "grad_norm": 0.5072925090789795, "learning_rate": 6.9005975253136324e-06, "loss": 0.3451, "step": 2827 }, { "epoch": 1.3151449387691831, "grad_norm": 0.4119420051574707, "learning_rate": 6.898094424984319e-06, "loss": 0.3581, "step": 2828 }, { "epoch": 1.31560998294838, "grad_norm": 0.453228235244751, "learning_rate": 6.89559076873398e-06, "loss": 0.3201, "step": 2829 }, { "epoch": 1.3160750271275772, "grad_norm": 0.5295499563217163, "learning_rate": 6.893086557295896e-06, "loss": 0.387, "step": 2830 }, { "epoch": 1.316540071306774, "grad_norm": 0.428733229637146, "learning_rate": 6.890581791403509e-06, "loss": 0.3681, "step": 2831 }, { "epoch": 1.3170051154859712, "grad_norm": 0.4751463234424591, "learning_rate": 6.888076471790423e-06, "loss": 0.345, "step": 2832 }, { "epoch": 1.3174701596651681, "grad_norm": 0.44530734419822693, "learning_rate": 6.885570599190408e-06, "loss": 0.3325, "step": 2833 }, { "epoch": 1.3179352038443652, "grad_norm": 0.5325456857681274, "learning_rate": 6.88306417433739e-06, "loss": 0.3644, "step": 2834 }, { "epoch": 1.3184002480235621, "grad_norm": 0.3941551446914673, "learning_rate": 6.880557197965465e-06, "loss": 0.3159, "step": 2835 }, { "epoch": 1.3188652922027593, "grad_norm": 0.40330174565315247, "learning_rate": 6.878049670808882e-06, "loss": 0.3403, "step": 2836 }, { "epoch": 1.3193303363819564, "grad_norm": 0.5025704503059387, "learning_rate": 6.875541593602055e-06, "loss": 0.3683, "step": 2837 }, { "epoch": 1.3197953805611533, "grad_norm": 0.48347654938697815, "learning_rate": 6.873032967079562e-06, "loss": 0.3385, "step": 2838 }, { "epoch": 1.3202604247403502, "grad_norm": 0.45224472880363464, "learning_rate": 6.8705237919761344e-06, "loss": 0.4031, "step": 2839 }, { "epoch": 1.3207254689195473, "grad_norm": 0.4854915142059326, "learning_rate": 6.868014069026672e-06, "loss": 0.3297, "step": 2840 }, { "epoch": 1.3211905130987445, "grad_norm": 0.5176591873168945, "learning_rate": 6.865503798966232e-06, "loss": 0.3377, "step": 2841 }, { "epoch": 1.3216555572779414, "grad_norm": 0.43474873900413513, "learning_rate": 6.86299298253003e-06, "loss": 0.3378, "step": 2842 }, { "epoch": 1.3221206014571385, "grad_norm": 0.5134937167167664, "learning_rate": 6.860481620453445e-06, "loss": 0.3775, "step": 2843 }, { "epoch": 1.3225856456363354, "grad_norm": 0.47907009720802307, "learning_rate": 6.857969713472015e-06, "loss": 0.3326, "step": 2844 }, { "epoch": 1.3230506898155325, "grad_norm": 0.4050777554512024, "learning_rate": 6.855457262321433e-06, "loss": 0.3613, "step": 2845 }, { "epoch": 1.3235157339947294, "grad_norm": 0.4659248888492584, "learning_rate": 6.852944267737557e-06, "loss": 0.3669, "step": 2846 }, { "epoch": 1.3239807781739266, "grad_norm": 0.42381158471107483, "learning_rate": 6.850430730456403e-06, "loss": 0.37, "step": 2847 }, { "epoch": 1.3244458223531235, "grad_norm": 0.39930933713912964, "learning_rate": 6.847916651214146e-06, "loss": 0.3577, "step": 2848 }, { "epoch": 1.3249108665323206, "grad_norm": 0.3815561532974243, "learning_rate": 6.845402030747118e-06, "loss": 0.3432, "step": 2849 }, { "epoch": 1.3253759107115175, "grad_norm": 0.41214823722839355, "learning_rate": 6.84288686979181e-06, "loss": 0.3228, "step": 2850 }, { "epoch": 1.3258409548907146, "grad_norm": 0.41365644335746765, "learning_rate": 6.8403711690848715e-06, "loss": 0.3858, "step": 2851 }, { "epoch": 1.3263059990699118, "grad_norm": 0.4793543219566345, "learning_rate": 6.83785492936311e-06, "loss": 0.3466, "step": 2852 }, { "epoch": 1.3267710432491087, "grad_norm": 0.39055493474006653, "learning_rate": 6.8353381513634945e-06, "loss": 0.3124, "step": 2853 }, { "epoch": 1.3272360874283056, "grad_norm": 0.37652844190597534, "learning_rate": 6.832820835823145e-06, "loss": 0.3251, "step": 2854 }, { "epoch": 1.3277011316075027, "grad_norm": 0.4325464069843292, "learning_rate": 6.830302983479344e-06, "loss": 0.3776, "step": 2855 }, { "epoch": 1.3281661757866998, "grad_norm": 0.3975974917411804, "learning_rate": 6.827784595069529e-06, "loss": 0.3446, "step": 2856 }, { "epoch": 1.3286312199658967, "grad_norm": 0.3837142288684845, "learning_rate": 6.825265671331295e-06, "loss": 0.3613, "step": 2857 }, { "epoch": 1.3290962641450939, "grad_norm": 0.44566547870635986, "learning_rate": 6.822746213002393e-06, "loss": 0.3458, "step": 2858 }, { "epoch": 1.3295613083242908, "grad_norm": 0.38540929555892944, "learning_rate": 6.820226220820733e-06, "loss": 0.3582, "step": 2859 }, { "epoch": 1.3300263525034879, "grad_norm": 0.4004163444042206, "learning_rate": 6.8177056955243794e-06, "loss": 0.3327, "step": 2860 }, { "epoch": 1.3304913966826848, "grad_norm": 0.3982474207878113, "learning_rate": 6.815184637851553e-06, "loss": 0.3696, "step": 2861 }, { "epoch": 1.330956440861882, "grad_norm": 0.36453261971473694, "learning_rate": 6.812663048540631e-06, "loss": 0.3276, "step": 2862 }, { "epoch": 1.3314214850410788, "grad_norm": 0.41965192556381226, "learning_rate": 6.810140928330144e-06, "loss": 0.3681, "step": 2863 }, { "epoch": 1.331886529220276, "grad_norm": 0.4125366806983948, "learning_rate": 6.807618277958783e-06, "loss": 0.3285, "step": 2864 }, { "epoch": 1.3323515733994729, "grad_norm": 0.3705972731113434, "learning_rate": 6.805095098165388e-06, "loss": 0.3291, "step": 2865 }, { "epoch": 1.33281661757867, "grad_norm": 0.48080259561538696, "learning_rate": 6.8025713896889615e-06, "loss": 0.3379, "step": 2866 }, { "epoch": 1.333281661757867, "grad_norm": 0.39710700511932373, "learning_rate": 6.800047153268653e-06, "loss": 0.3732, "step": 2867 }, { "epoch": 1.333746705937064, "grad_norm": 0.3951427638530731, "learning_rate": 6.797522389643772e-06, "loss": 0.3569, "step": 2868 }, { "epoch": 1.334211750116261, "grad_norm": 0.33918821811676025, "learning_rate": 6.79499709955378e-06, "loss": 0.3214, "step": 2869 }, { "epoch": 1.334676794295458, "grad_norm": 0.39031147956848145, "learning_rate": 6.792471283738293e-06, "loss": 0.3722, "step": 2870 }, { "epoch": 1.3351418384746552, "grad_norm": 0.3802085518836975, "learning_rate": 6.789944942937084e-06, "loss": 0.3364, "step": 2871 }, { "epoch": 1.335606882653852, "grad_norm": 0.36190685629844666, "learning_rate": 6.787418077890076e-06, "loss": 0.3239, "step": 2872 }, { "epoch": 1.3360719268330492, "grad_norm": 0.4449096620082855, "learning_rate": 6.784890689337346e-06, "loss": 0.3719, "step": 2873 }, { "epoch": 1.3365369710122461, "grad_norm": 0.4153344929218292, "learning_rate": 6.782362778019125e-06, "loss": 0.3713, "step": 2874 }, { "epoch": 1.3370020151914432, "grad_norm": 0.3987843692302704, "learning_rate": 6.779834344675797e-06, "loss": 0.3889, "step": 2875 }, { "epoch": 1.3374670593706401, "grad_norm": 0.39704734086990356, "learning_rate": 6.7773053900478995e-06, "loss": 0.2937, "step": 2876 }, { "epoch": 1.3379321035498373, "grad_norm": 0.42171093821525574, "learning_rate": 6.774775914876123e-06, "loss": 0.3604, "step": 2877 }, { "epoch": 1.3383971477290342, "grad_norm": 0.42374035716056824, "learning_rate": 6.7722459199013095e-06, "loss": 0.3642, "step": 2878 }, { "epoch": 1.3388621919082313, "grad_norm": 0.43570584058761597, "learning_rate": 6.769715405864452e-06, "loss": 0.391, "step": 2879 }, { "epoch": 1.3393272360874282, "grad_norm": 0.361253947019577, "learning_rate": 6.767184373506698e-06, "loss": 0.3158, "step": 2880 }, { "epoch": 1.3397922802666253, "grad_norm": 0.46323809027671814, "learning_rate": 6.7646528235693445e-06, "loss": 0.337, "step": 2881 }, { "epoch": 1.3402573244458225, "grad_norm": 0.5015746355056763, "learning_rate": 6.762120756793844e-06, "loss": 0.3892, "step": 2882 }, { "epoch": 1.3407223686250194, "grad_norm": 0.43376925587654114, "learning_rate": 6.759588173921796e-06, "loss": 0.3749, "step": 2883 }, { "epoch": 1.3411874128042163, "grad_norm": 0.4221652150154114, "learning_rate": 6.757055075694954e-06, "loss": 0.3531, "step": 2884 }, { "epoch": 1.3416524569834134, "grad_norm": 0.49050408601760864, "learning_rate": 6.754521462855219e-06, "loss": 0.3557, "step": 2885 }, { "epoch": 1.3421175011626105, "grad_norm": 0.4941301941871643, "learning_rate": 6.7519873361446475e-06, "loss": 0.3587, "step": 2886 }, { "epoch": 1.3425825453418074, "grad_norm": 0.378880113363266, "learning_rate": 6.749452696305442e-06, "loss": 0.3779, "step": 2887 }, { "epoch": 1.3430475895210046, "grad_norm": 0.41799113154411316, "learning_rate": 6.74691754407996e-06, "loss": 0.339, "step": 2888 }, { "epoch": 1.3435126337002015, "grad_norm": 0.39678072929382324, "learning_rate": 6.744381880210703e-06, "loss": 0.3487, "step": 2889 }, { "epoch": 1.3439776778793986, "grad_norm": 0.41609761118888855, "learning_rate": 6.741845705440329e-06, "loss": 0.3937, "step": 2890 }, { "epoch": 1.3444427220585955, "grad_norm": 0.4008464217185974, "learning_rate": 6.739309020511641e-06, "loss": 0.3155, "step": 2891 }, { "epoch": 1.3449077662377926, "grad_norm": 0.45266151428222656, "learning_rate": 6.736771826167592e-06, "loss": 0.3634, "step": 2892 }, { "epoch": 1.3453728104169898, "grad_norm": 0.423709511756897, "learning_rate": 6.734234123151284e-06, "loss": 0.3839, "step": 2893 }, { "epoch": 1.3458378545961867, "grad_norm": 0.4130350649356842, "learning_rate": 6.731695912205974e-06, "loss": 0.3357, "step": 2894 }, { "epoch": 1.3463028987753836, "grad_norm": 0.38522496819496155, "learning_rate": 6.7291571940750575e-06, "loss": 0.316, "step": 2895 }, { "epoch": 1.3467679429545807, "grad_norm": 0.47391700744628906, "learning_rate": 6.726617969502088e-06, "loss": 0.3453, "step": 2896 }, { "epoch": 1.3472329871337778, "grad_norm": 0.3721844255924225, "learning_rate": 6.724078239230758e-06, "loss": 0.3274, "step": 2897 }, { "epoch": 1.3476980313129747, "grad_norm": 0.45813339948654175, "learning_rate": 6.721538004004918e-06, "loss": 0.3747, "step": 2898 }, { "epoch": 1.3481630754921716, "grad_norm": 0.44626984000205994, "learning_rate": 6.71899726456856e-06, "loss": 0.3592, "step": 2899 }, { "epoch": 1.3486281196713688, "grad_norm": 0.4165315330028534, "learning_rate": 6.716456021665825e-06, "loss": 0.3405, "step": 2900 }, { "epoch": 1.3490931638505659, "grad_norm": 0.40565526485443115, "learning_rate": 6.713914276041001e-06, "loss": 0.351, "step": 2901 }, { "epoch": 1.3495582080297628, "grad_norm": 0.4150947332382202, "learning_rate": 6.7113720284385255e-06, "loss": 0.3609, "step": 2902 }, { "epoch": 1.35002325220896, "grad_norm": 0.35937878489494324, "learning_rate": 6.708829279602982e-06, "loss": 0.3121, "step": 2903 }, { "epoch": 1.3504882963881568, "grad_norm": 0.4622684121131897, "learning_rate": 6.7062860302790965e-06, "loss": 0.3757, "step": 2904 }, { "epoch": 1.350953340567354, "grad_norm": 0.3929741680622101, "learning_rate": 6.70374228121175e-06, "loss": 0.3468, "step": 2905 }, { "epoch": 1.3514183847465508, "grad_norm": 0.3833123743534088, "learning_rate": 6.7011980331459635e-06, "loss": 0.3254, "step": 2906 }, { "epoch": 1.351883428925748, "grad_norm": 0.4471578598022461, "learning_rate": 6.698653286826906e-06, "loss": 0.3819, "step": 2907 }, { "epoch": 1.352348473104945, "grad_norm": 0.42805638909339905, "learning_rate": 6.696108042999892e-06, "loss": 0.3425, "step": 2908 }, { "epoch": 1.352813517284142, "grad_norm": 0.40058985352516174, "learning_rate": 6.693562302410384e-06, "loss": 0.3844, "step": 2909 }, { "epoch": 1.353278561463339, "grad_norm": 0.417976051568985, "learning_rate": 6.6910160658039835e-06, "loss": 0.3821, "step": 2910 }, { "epoch": 1.353743605642536, "grad_norm": 0.36969244480133057, "learning_rate": 6.6884693339264466e-06, "loss": 0.3038, "step": 2911 }, { "epoch": 1.3542086498217332, "grad_norm": 0.36144527792930603, "learning_rate": 6.685922107523667e-06, "loss": 0.3405, "step": 2912 }, { "epoch": 1.35467369400093, "grad_norm": 0.4299263060092926, "learning_rate": 6.683374387341688e-06, "loss": 0.3538, "step": 2913 }, { "epoch": 1.355138738180127, "grad_norm": 0.41073358058929443, "learning_rate": 6.680826174126693e-06, "loss": 0.3451, "step": 2914 }, { "epoch": 1.355603782359324, "grad_norm": 0.42276236414909363, "learning_rate": 6.678277468625014e-06, "loss": 0.3641, "step": 2915 }, { "epoch": 1.3560688265385212, "grad_norm": 0.39464181661605835, "learning_rate": 6.675728271583124e-06, "loss": 0.3313, "step": 2916 }, { "epoch": 1.3565338707177181, "grad_norm": 0.3857058882713318, "learning_rate": 6.673178583747644e-06, "loss": 0.3363, "step": 2917 }, { "epoch": 1.3569989148969153, "grad_norm": 0.40449684858322144, "learning_rate": 6.670628405865334e-06, "loss": 0.3499, "step": 2918 }, { "epoch": 1.3574639590761122, "grad_norm": 0.43879038095474243, "learning_rate": 6.6680777386831e-06, "loss": 0.3781, "step": 2919 }, { "epoch": 1.3579290032553093, "grad_norm": 0.4013797640800476, "learning_rate": 6.665526582947991e-06, "loss": 0.3759, "step": 2920 }, { "epoch": 1.3583940474345062, "grad_norm": 0.5068005323410034, "learning_rate": 6.6629749394071995e-06, "loss": 0.3319, "step": 2921 }, { "epoch": 1.3588590916137033, "grad_norm": 0.43432286381721497, "learning_rate": 6.6604228088080605e-06, "loss": 0.3545, "step": 2922 }, { "epoch": 1.3593241357929005, "grad_norm": 0.5053911805152893, "learning_rate": 6.657870191898051e-06, "loss": 0.3545, "step": 2923 }, { "epoch": 1.3597891799720974, "grad_norm": 0.37629908323287964, "learning_rate": 6.655317089424791e-06, "loss": 0.3084, "step": 2924 }, { "epoch": 1.3602542241512943, "grad_norm": 0.40685826539993286, "learning_rate": 6.652763502136044e-06, "loss": 0.3445, "step": 2925 }, { "epoch": 1.3607192683304914, "grad_norm": 0.46807217597961426, "learning_rate": 6.6502094307797124e-06, "loss": 0.3464, "step": 2926 }, { "epoch": 1.3611843125096885, "grad_norm": 0.43712517619132996, "learning_rate": 6.647654876103844e-06, "loss": 0.3325, "step": 2927 }, { "epoch": 1.3616493566888854, "grad_norm": 0.5149297118186951, "learning_rate": 6.645099838856624e-06, "loss": 0.405, "step": 2928 }, { "epoch": 1.3621144008680823, "grad_norm": 0.41195258498191833, "learning_rate": 6.6425443197863836e-06, "loss": 0.3373, "step": 2929 }, { "epoch": 1.3625794450472795, "grad_norm": 0.5708654522895813, "learning_rate": 6.639988319641592e-06, "loss": 0.3448, "step": 2930 }, { "epoch": 1.3630444892264766, "grad_norm": 0.45768019556999207, "learning_rate": 6.637431839170861e-06, "loss": 0.3502, "step": 2931 }, { "epoch": 1.3635095334056735, "grad_norm": 0.42099401354789734, "learning_rate": 6.6348748791229416e-06, "loss": 0.3665, "step": 2932 }, { "epoch": 1.3639745775848706, "grad_norm": 0.45441335439682007, "learning_rate": 6.632317440246725e-06, "loss": 0.3322, "step": 2933 }, { "epoch": 1.3644396217640675, "grad_norm": 0.5002641677856445, "learning_rate": 6.629759523291242e-06, "loss": 0.3342, "step": 2934 }, { "epoch": 1.3649046659432646, "grad_norm": 0.4336529076099396, "learning_rate": 6.627201129005672e-06, "loss": 0.3839, "step": 2935 }, { "epoch": 1.3653697101224616, "grad_norm": 0.47555792331695557, "learning_rate": 6.624642258139318e-06, "loss": 0.3358, "step": 2936 }, { "epoch": 1.3658347543016587, "grad_norm": 0.47870880365371704, "learning_rate": 6.622082911441637e-06, "loss": 0.3573, "step": 2937 }, { "epoch": 1.3662997984808558, "grad_norm": 0.44583725929260254, "learning_rate": 6.619523089662219e-06, "loss": 0.3536, "step": 2938 }, { "epoch": 1.3667648426600527, "grad_norm": 0.44511839747428894, "learning_rate": 6.616962793550794e-06, "loss": 0.3496, "step": 2939 }, { "epoch": 1.3672298868392496, "grad_norm": 0.4455788731575012, "learning_rate": 6.614402023857231e-06, "loss": 0.3345, "step": 2940 }, { "epoch": 1.3676949310184467, "grad_norm": 0.42460909485816956, "learning_rate": 6.61184078133154e-06, "loss": 0.3473, "step": 2941 }, { "epoch": 1.3681599751976439, "grad_norm": 0.44328975677490234, "learning_rate": 6.609279066723865e-06, "loss": 0.3733, "step": 2942 }, { "epoch": 1.3686250193768408, "grad_norm": 0.4914466142654419, "learning_rate": 6.606716880784491e-06, "loss": 0.373, "step": 2943 }, { "epoch": 1.369090063556038, "grad_norm": 0.4081343710422516, "learning_rate": 6.604154224263839e-06, "loss": 0.3409, "step": 2944 }, { "epoch": 1.3695551077352348, "grad_norm": 0.44254809617996216, "learning_rate": 6.601591097912472e-06, "loss": 0.3659, "step": 2945 }, { "epoch": 1.370020151914432, "grad_norm": 0.4107268452644348, "learning_rate": 6.599027502481088e-06, "loss": 0.3359, "step": 2946 }, { "epoch": 1.3704851960936288, "grad_norm": 0.4272344410419464, "learning_rate": 6.596463438720522e-06, "loss": 0.3476, "step": 2947 }, { "epoch": 1.370950240272826, "grad_norm": 0.47288623452186584, "learning_rate": 6.593898907381746e-06, "loss": 0.3282, "step": 2948 }, { "epoch": 1.3714152844520229, "grad_norm": 0.4827193319797516, "learning_rate": 6.59133390921587e-06, "loss": 0.3738, "step": 2949 }, { "epoch": 1.37188032863122, "grad_norm": 0.4004787504673004, "learning_rate": 6.588768444974141e-06, "loss": 0.3441, "step": 2950 }, { "epoch": 1.372345372810417, "grad_norm": 0.44598275423049927, "learning_rate": 6.58620251540794e-06, "loss": 0.3349, "step": 2951 }, { "epoch": 1.372810416989614, "grad_norm": 0.49700048565864563, "learning_rate": 6.583636121268787e-06, "loss": 0.3879, "step": 2952 }, { "epoch": 1.3732754611688112, "grad_norm": 0.46100863814353943, "learning_rate": 6.581069263308338e-06, "loss": 0.3708, "step": 2953 }, { "epoch": 1.373740505348008, "grad_norm": 0.47054529190063477, "learning_rate": 6.5785019422783836e-06, "loss": 0.352, "step": 2954 }, { "epoch": 1.374205549527205, "grad_norm": 0.4136134386062622, "learning_rate": 6.57593415893085e-06, "loss": 0.3491, "step": 2955 }, { "epoch": 1.374670593706402, "grad_norm": 0.49520325660705566, "learning_rate": 6.5733659140178e-06, "loss": 0.3456, "step": 2956 }, { "epoch": 1.3751356378855992, "grad_norm": 0.3895581066608429, "learning_rate": 6.5707972082914275e-06, "loss": 0.3026, "step": 2957 }, { "epoch": 1.3756006820647961, "grad_norm": 0.4688425660133362, "learning_rate": 6.56822804250407e-06, "loss": 0.3848, "step": 2958 }, { "epoch": 1.3760657262439933, "grad_norm": 0.45133891701698303, "learning_rate": 6.56565841740819e-06, "loss": 0.3607, "step": 2959 }, { "epoch": 1.3765307704231902, "grad_norm": 0.49955958127975464, "learning_rate": 6.5630883337563935e-06, "loss": 0.3493, "step": 2960 }, { "epoch": 1.3769958146023873, "grad_norm": 0.43193817138671875, "learning_rate": 6.560517792301412e-06, "loss": 0.3393, "step": 2961 }, { "epoch": 1.3774608587815842, "grad_norm": 0.4623286724090576, "learning_rate": 6.557946793796116e-06, "loss": 0.367, "step": 2962 }, { "epoch": 1.3779259029607813, "grad_norm": 0.44610491394996643, "learning_rate": 6.55537533899351e-06, "loss": 0.3393, "step": 2963 }, { "epoch": 1.3783909471399782, "grad_norm": 0.4373582601547241, "learning_rate": 6.552803428646732e-06, "loss": 0.3219, "step": 2964 }, { "epoch": 1.3788559913191754, "grad_norm": 0.4308757185935974, "learning_rate": 6.550231063509054e-06, "loss": 0.3711, "step": 2965 }, { "epoch": 1.3793210354983723, "grad_norm": 0.42997416853904724, "learning_rate": 6.547658244333876e-06, "loss": 0.3208, "step": 2966 }, { "epoch": 1.3797860796775694, "grad_norm": 0.4055032432079315, "learning_rate": 6.545084971874738e-06, "loss": 0.3543, "step": 2967 }, { "epoch": 1.3802511238567665, "grad_norm": 0.39875516295433044, "learning_rate": 6.542511246885308e-06, "loss": 0.3392, "step": 2968 }, { "epoch": 1.3807161680359634, "grad_norm": 0.4401739239692688, "learning_rate": 6.539937070119389e-06, "loss": 0.3887, "step": 2969 }, { "epoch": 1.3811812122151603, "grad_norm": 0.38288089632987976, "learning_rate": 6.5373624423309165e-06, "loss": 0.3448, "step": 2970 }, { "epoch": 1.3816462563943575, "grad_norm": 0.44091928005218506, "learning_rate": 6.534787364273957e-06, "loss": 0.3297, "step": 2971 }, { "epoch": 1.3821113005735546, "grad_norm": 0.5266174674034119, "learning_rate": 6.532211836702708e-06, "loss": 0.3979, "step": 2972 }, { "epoch": 1.3825763447527515, "grad_norm": 0.40957939624786377, "learning_rate": 6.529635860371497e-06, "loss": 0.3353, "step": 2973 }, { "epoch": 1.3830413889319486, "grad_norm": 0.4384903311729431, "learning_rate": 6.527059436034791e-06, "loss": 0.3386, "step": 2974 }, { "epoch": 1.3835064331111455, "grad_norm": 0.3974456787109375, "learning_rate": 6.524482564447181e-06, "loss": 0.3046, "step": 2975 }, { "epoch": 1.3839714772903426, "grad_norm": 0.43884211778640747, "learning_rate": 6.521905246363389e-06, "loss": 0.412, "step": 2976 }, { "epoch": 1.3844365214695395, "grad_norm": 0.4392377436161041, "learning_rate": 6.5193274825382724e-06, "loss": 0.3413, "step": 2977 }, { "epoch": 1.3849015656487367, "grad_norm": 0.4340946674346924, "learning_rate": 6.516749273726814e-06, "loss": 0.3686, "step": 2978 }, { "epoch": 1.3853666098279336, "grad_norm": 0.3607385456562042, "learning_rate": 6.514170620684128e-06, "loss": 0.3132, "step": 2979 }, { "epoch": 1.3858316540071307, "grad_norm": 0.5139884352684021, "learning_rate": 6.511591524165465e-06, "loss": 0.3757, "step": 2980 }, { "epoch": 1.3862966981863276, "grad_norm": 0.43492573499679565, "learning_rate": 6.509011984926197e-06, "loss": 0.3282, "step": 2981 }, { "epoch": 1.3867617423655247, "grad_norm": 0.3985266089439392, "learning_rate": 6.50643200372183e-06, "loss": 0.3391, "step": 2982 }, { "epoch": 1.3872267865447219, "grad_norm": 0.5525508522987366, "learning_rate": 6.503851581307997e-06, "loss": 0.4244, "step": 2983 }, { "epoch": 1.3876918307239188, "grad_norm": 0.3948424756526947, "learning_rate": 6.5012707184404645e-06, "loss": 0.3381, "step": 2984 }, { "epoch": 1.3881568749031157, "grad_norm": 0.4646177589893341, "learning_rate": 6.498689415875121e-06, "loss": 0.3424, "step": 2985 }, { "epoch": 1.3886219190823128, "grad_norm": 0.4283245801925659, "learning_rate": 6.496107674367994e-06, "loss": 0.3178, "step": 2986 }, { "epoch": 1.38908696326151, "grad_norm": 0.45199066400527954, "learning_rate": 6.49352549467523e-06, "loss": 0.3601, "step": 2987 }, { "epoch": 1.3895520074407068, "grad_norm": 0.3850160837173462, "learning_rate": 6.4909428775531095e-06, "loss": 0.3472, "step": 2988 }, { "epoch": 1.390017051619904, "grad_norm": 0.4370267391204834, "learning_rate": 6.488359823758036e-06, "loss": 0.3827, "step": 2989 }, { "epoch": 1.3904820957991009, "grad_norm": 0.4112876355648041, "learning_rate": 6.485776334046546e-06, "loss": 0.3564, "step": 2990 }, { "epoch": 1.390947139978298, "grad_norm": 0.4564457833766937, "learning_rate": 6.483192409175301e-06, "loss": 0.3507, "step": 2991 }, { "epoch": 1.391412184157495, "grad_norm": 0.4476487636566162, "learning_rate": 6.4806080499010916e-06, "loss": 0.3607, "step": 2992 }, { "epoch": 1.391877228336692, "grad_norm": 0.4968775808811188, "learning_rate": 6.478023256980835e-06, "loss": 0.3816, "step": 2993 }, { "epoch": 1.392342272515889, "grad_norm": 0.38123828172683716, "learning_rate": 6.475438031171574e-06, "loss": 0.3263, "step": 2994 }, { "epoch": 1.392807316695086, "grad_norm": 0.46535784006118774, "learning_rate": 6.472852373230478e-06, "loss": 0.3571, "step": 2995 }, { "epoch": 1.393272360874283, "grad_norm": 0.6109573841094971, "learning_rate": 6.4702662839148476e-06, "loss": 0.3365, "step": 2996 }, { "epoch": 1.39373740505348, "grad_norm": 0.41744059324264526, "learning_rate": 6.467679763982103e-06, "loss": 0.3295, "step": 2997 }, { "epoch": 1.3942024492326772, "grad_norm": 0.3972165286540985, "learning_rate": 6.465092814189795e-06, "loss": 0.3273, "step": 2998 }, { "epoch": 1.3946674934118741, "grad_norm": 0.5437119007110596, "learning_rate": 6.462505435295601e-06, "loss": 0.3738, "step": 2999 }, { "epoch": 1.395132537591071, "grad_norm": 0.4269973337650299, "learning_rate": 6.459917628057319e-06, "loss": 0.321, "step": 3000 }, { "epoch": 1.3955975817702682, "grad_norm": 0.46754154562950134, "learning_rate": 6.457329393232878e-06, "loss": 0.3345, "step": 3001 }, { "epoch": 1.3960626259494653, "grad_norm": 0.5135738253593445, "learning_rate": 6.454740731580331e-06, "loss": 0.3541, "step": 3002 }, { "epoch": 1.3965276701286622, "grad_norm": 0.5451841950416565, "learning_rate": 6.452151643857851e-06, "loss": 0.3808, "step": 3003 }, { "epoch": 1.3969927143078593, "grad_norm": 0.5181593298912048, "learning_rate": 6.4495621308237435e-06, "loss": 0.348, "step": 3004 }, { "epoch": 1.3974577584870562, "grad_norm": 0.5118134617805481, "learning_rate": 6.446972193236433e-06, "loss": 0.3291, "step": 3005 }, { "epoch": 1.3979228026662534, "grad_norm": 0.48063331842422485, "learning_rate": 6.444381831854469e-06, "loss": 0.3796, "step": 3006 }, { "epoch": 1.3983878468454503, "grad_norm": 0.4797370433807373, "learning_rate": 6.44179104743653e-06, "loss": 0.3617, "step": 3007 }, { "epoch": 1.3988528910246474, "grad_norm": 0.4799365699291229, "learning_rate": 6.439199840741412e-06, "loss": 0.3273, "step": 3008 }, { "epoch": 1.3993179352038443, "grad_norm": 0.4450487494468689, "learning_rate": 6.436608212528037e-06, "loss": 0.3871, "step": 3009 }, { "epoch": 1.3997829793830414, "grad_norm": 0.4422202706336975, "learning_rate": 6.434016163555452e-06, "loss": 0.3508, "step": 3010 }, { "epoch": 1.4002480235622383, "grad_norm": 0.4393402636051178, "learning_rate": 6.431423694582825e-06, "loss": 0.3253, "step": 3011 }, { "epoch": 1.4007130677414354, "grad_norm": 0.4574717879295349, "learning_rate": 6.428830806369451e-06, "loss": 0.3499, "step": 3012 }, { "epoch": 1.4011781119206326, "grad_norm": 0.4212042987346649, "learning_rate": 6.42623749967474e-06, "loss": 0.353, "step": 3013 }, { "epoch": 1.4016431560998295, "grad_norm": 0.3749128580093384, "learning_rate": 6.423643775258232e-06, "loss": 0.3304, "step": 3014 }, { "epoch": 1.4021082002790264, "grad_norm": 0.4546009302139282, "learning_rate": 6.421049633879588e-06, "loss": 0.3276, "step": 3015 }, { "epoch": 1.4025732444582235, "grad_norm": 0.4350774586200714, "learning_rate": 6.418455076298587e-06, "loss": 0.3725, "step": 3016 }, { "epoch": 1.4030382886374206, "grad_norm": 0.4265797436237335, "learning_rate": 6.415860103275136e-06, "loss": 0.3663, "step": 3017 }, { "epoch": 1.4035033328166175, "grad_norm": 0.3795028030872345, "learning_rate": 6.413264715569259e-06, "loss": 0.3185, "step": 3018 }, { "epoch": 1.4039683769958147, "grad_norm": 0.39534875750541687, "learning_rate": 6.4106689139411015e-06, "loss": 0.3468, "step": 3019 }, { "epoch": 1.4044334211750116, "grad_norm": 0.3997578024864197, "learning_rate": 6.408072699150933e-06, "loss": 0.371, "step": 3020 }, { "epoch": 1.4048984653542087, "grad_norm": 0.3619006872177124, "learning_rate": 6.405476071959142e-06, "loss": 0.3284, "step": 3021 }, { "epoch": 1.4053635095334056, "grad_norm": 0.41694512963294983, "learning_rate": 6.402879033126239e-06, "loss": 0.3559, "step": 3022 }, { "epoch": 1.4058285537126027, "grad_norm": 0.41265085339546204, "learning_rate": 6.400281583412855e-06, "loss": 0.3171, "step": 3023 }, { "epoch": 1.4062935978917996, "grad_norm": 0.5016796588897705, "learning_rate": 6.397683723579741e-06, "loss": 0.3731, "step": 3024 }, { "epoch": 1.4067586420709968, "grad_norm": 0.39977139234542847, "learning_rate": 6.395085454387766e-06, "loss": 0.3065, "step": 3025 }, { "epoch": 1.4072236862501937, "grad_norm": 0.41781142354011536, "learning_rate": 6.392486776597921e-06, "loss": 0.3491, "step": 3026 }, { "epoch": 1.4076887304293908, "grad_norm": 0.4563649594783783, "learning_rate": 6.389887690971319e-06, "loss": 0.3464, "step": 3027 }, { "epoch": 1.408153774608588, "grad_norm": 0.4177623987197876, "learning_rate": 6.387288198269189e-06, "loss": 0.3782, "step": 3028 }, { "epoch": 1.4086188187877848, "grad_norm": 0.3898913264274597, "learning_rate": 6.384688299252879e-06, "loss": 0.3564, "step": 3029 }, { "epoch": 1.4090838629669817, "grad_norm": 0.3614499568939209, "learning_rate": 6.3820879946838585e-06, "loss": 0.3215, "step": 3030 }, { "epoch": 1.4095489071461789, "grad_norm": 0.45202839374542236, "learning_rate": 6.379487285323713e-06, "loss": 0.3829, "step": 3031 }, { "epoch": 1.410013951325376, "grad_norm": 0.39168378710746765, "learning_rate": 6.3768861719341475e-06, "loss": 0.3619, "step": 3032 }, { "epoch": 1.410478995504573, "grad_norm": 0.3808189630508423, "learning_rate": 6.374284655276989e-06, "loss": 0.3376, "step": 3033 }, { "epoch": 1.41094403968377, "grad_norm": 0.43050652742385864, "learning_rate": 6.371682736114178e-06, "loss": 0.3206, "step": 3034 }, { "epoch": 1.411409083862967, "grad_norm": 0.4489372968673706, "learning_rate": 6.369080415207773e-06, "loss": 0.3917, "step": 3035 }, { "epoch": 1.411874128042164, "grad_norm": 0.37065914273262024, "learning_rate": 6.366477693319953e-06, "loss": 0.3446, "step": 3036 }, { "epoch": 1.412339172221361, "grad_norm": 0.3754143714904785, "learning_rate": 6.363874571213013e-06, "loss": 0.356, "step": 3037 }, { "epoch": 1.412804216400558, "grad_norm": 0.3956548273563385, "learning_rate": 6.361271049649363e-06, "loss": 0.3558, "step": 3038 }, { "epoch": 1.4132692605797552, "grad_norm": 0.4007726013660431, "learning_rate": 6.358667129391536e-06, "loss": 0.3204, "step": 3039 }, { "epoch": 1.4137343047589521, "grad_norm": 0.42144933342933655, "learning_rate": 6.356062811202175e-06, "loss": 0.3674, "step": 3040 }, { "epoch": 1.414199348938149, "grad_norm": 0.4103334844112396, "learning_rate": 6.3534580958440425e-06, "loss": 0.3627, "step": 3041 }, { "epoch": 1.4146643931173462, "grad_norm": 0.34175369143486023, "learning_rate": 6.35085298408002e-06, "loss": 0.3123, "step": 3042 }, { "epoch": 1.4151294372965433, "grad_norm": 0.38748347759246826, "learning_rate": 6.348247476673099e-06, "loss": 0.3575, "step": 3043 }, { "epoch": 1.4155944814757402, "grad_norm": 0.413284569978714, "learning_rate": 6.345641574386393e-06, "loss": 0.3341, "step": 3044 }, { "epoch": 1.416059525654937, "grad_norm": 0.37702399492263794, "learning_rate": 6.3430352779831275e-06, "loss": 0.3339, "step": 3045 }, { "epoch": 1.4165245698341342, "grad_norm": 0.4544568955898285, "learning_rate": 6.340428588226643e-06, "loss": 0.3564, "step": 3046 }, { "epoch": 1.4169896140133313, "grad_norm": 0.3882623016834259, "learning_rate": 6.337821505880399e-06, "loss": 0.3699, "step": 3047 }, { "epoch": 1.4174546581925282, "grad_norm": 0.3817591369152069, "learning_rate": 6.335214031707966e-06, "loss": 0.3165, "step": 3048 }, { "epoch": 1.4179197023717254, "grad_norm": 0.39398717880249023, "learning_rate": 6.33260616647303e-06, "loss": 0.3486, "step": 3049 }, { "epoch": 1.4183847465509223, "grad_norm": 0.4238983988761902, "learning_rate": 6.329997910939394e-06, "loss": 0.3892, "step": 3050 }, { "epoch": 1.4188497907301194, "grad_norm": 0.43442922830581665, "learning_rate": 6.327389265870974e-06, "loss": 0.3235, "step": 3051 }, { "epoch": 1.4193148349093163, "grad_norm": 0.4579649865627289, "learning_rate": 6.324780232031799e-06, "loss": 0.3464, "step": 3052 }, { "epoch": 1.4197798790885134, "grad_norm": 0.3303820490837097, "learning_rate": 6.322170810186013e-06, "loss": 0.285, "step": 3053 }, { "epoch": 1.4202449232677106, "grad_norm": 0.4255482256412506, "learning_rate": 6.319561001097871e-06, "loss": 0.3688, "step": 3054 }, { "epoch": 1.4207099674469075, "grad_norm": 0.46313193440437317, "learning_rate": 6.316950805531746e-06, "loss": 0.3576, "step": 3055 }, { "epoch": 1.4211750116261044, "grad_norm": 0.412056565284729, "learning_rate": 6.314340224252124e-06, "loss": 0.3525, "step": 3056 }, { "epoch": 1.4216400558053015, "grad_norm": 0.4473661184310913, "learning_rate": 6.311729258023597e-06, "loss": 0.3644, "step": 3057 }, { "epoch": 1.4221050999844986, "grad_norm": 0.3638128638267517, "learning_rate": 6.309117907610878e-06, "loss": 0.3013, "step": 3058 }, { "epoch": 1.4225701441636955, "grad_norm": 0.471097856760025, "learning_rate": 6.306506173778788e-06, "loss": 0.3941, "step": 3059 }, { "epoch": 1.4230351883428924, "grad_norm": 0.42378881573677063, "learning_rate": 6.303894057292261e-06, "loss": 0.3553, "step": 3060 }, { "epoch": 1.4235002325220896, "grad_norm": 0.44823458790779114, "learning_rate": 6.3012815589163435e-06, "loss": 0.3624, "step": 3061 }, { "epoch": 1.4239652767012867, "grad_norm": 0.39349788427352905, "learning_rate": 6.2986686794161955e-06, "loss": 0.335, "step": 3062 }, { "epoch": 1.4244303208804836, "grad_norm": 0.39323508739471436, "learning_rate": 6.296055419557086e-06, "loss": 0.3689, "step": 3063 }, { "epoch": 1.4248953650596807, "grad_norm": 0.39461150765419006, "learning_rate": 6.293441780104394e-06, "loss": 0.351, "step": 3064 }, { "epoch": 1.4253604092388776, "grad_norm": 0.3934915065765381, "learning_rate": 6.290827761823617e-06, "loss": 0.3515, "step": 3065 }, { "epoch": 1.4258254534180748, "grad_norm": 0.407262921333313, "learning_rate": 6.2882133654803535e-06, "loss": 0.3113, "step": 3066 }, { "epoch": 1.4262904975972717, "grad_norm": 0.4266803562641144, "learning_rate": 6.28559859184032e-06, "loss": 0.3751, "step": 3067 }, { "epoch": 1.4267555417764688, "grad_norm": 0.39150920510292053, "learning_rate": 6.282983441669343e-06, "loss": 0.3722, "step": 3068 }, { "epoch": 1.427220585955666, "grad_norm": 0.42089784145355225, "learning_rate": 6.280367915733354e-06, "loss": 0.3574, "step": 3069 }, { "epoch": 1.4276856301348628, "grad_norm": 0.3548668324947357, "learning_rate": 6.277752014798401e-06, "loss": 0.3241, "step": 3070 }, { "epoch": 1.4281506743140597, "grad_norm": 0.4519556164741516, "learning_rate": 6.275135739630636e-06, "loss": 0.3671, "step": 3071 }, { "epoch": 1.4286157184932569, "grad_norm": 0.4052821695804596, "learning_rate": 6.272519090996326e-06, "loss": 0.3497, "step": 3072 }, { "epoch": 1.429080762672454, "grad_norm": 0.4167952835559845, "learning_rate": 6.269902069661843e-06, "loss": 0.3615, "step": 3073 }, { "epoch": 1.429545806851651, "grad_norm": 0.4397052526473999, "learning_rate": 6.267284676393672e-06, "loss": 0.3694, "step": 3074 }, { "epoch": 1.4300108510308478, "grad_norm": 0.42949673533439636, "learning_rate": 6.264666911958404e-06, "loss": 0.329, "step": 3075 }, { "epoch": 1.430475895210045, "grad_norm": 0.39663806557655334, "learning_rate": 6.26204877712274e-06, "loss": 0.3553, "step": 3076 }, { "epoch": 1.430940939389242, "grad_norm": 0.3915885388851166, "learning_rate": 6.259430272653489e-06, "loss": 0.349, "step": 3077 }, { "epoch": 1.431405983568439, "grad_norm": 0.4367777705192566, "learning_rate": 6.256811399317567e-06, "loss": 0.343, "step": 3078 }, { "epoch": 1.431871027747636, "grad_norm": 0.4142322242259979, "learning_rate": 6.254192157882002e-06, "loss": 0.3547, "step": 3079 }, { "epoch": 1.432336071926833, "grad_norm": 0.41435766220092773, "learning_rate": 6.251572549113925e-06, "loss": 0.3363, "step": 3080 }, { "epoch": 1.4328011161060301, "grad_norm": 0.3753543496131897, "learning_rate": 6.248952573780578e-06, "loss": 0.3099, "step": 3081 }, { "epoch": 1.433266160285227, "grad_norm": 0.4042842388153076, "learning_rate": 6.246332232649309e-06, "loss": 0.3347, "step": 3082 }, { "epoch": 1.4337312044644241, "grad_norm": 0.48114094138145447, "learning_rate": 6.243711526487575e-06, "loss": 0.4166, "step": 3083 }, { "epoch": 1.4341962486436213, "grad_norm": 0.36960569024086, "learning_rate": 6.241090456062934e-06, "loss": 0.3309, "step": 3084 }, { "epoch": 1.4346612928228182, "grad_norm": 0.4094170928001404, "learning_rate": 6.238469022143059e-06, "loss": 0.3325, "step": 3085 }, { "epoch": 1.435126337002015, "grad_norm": 0.418476402759552, "learning_rate": 6.235847225495724e-06, "loss": 0.3791, "step": 3086 }, { "epoch": 1.4355913811812122, "grad_norm": 0.3743997812271118, "learning_rate": 6.23322506688881e-06, "loss": 0.3432, "step": 3087 }, { "epoch": 1.4360564253604093, "grad_norm": 0.44286203384399414, "learning_rate": 6.230602547090307e-06, "loss": 0.3745, "step": 3088 }, { "epoch": 1.4365214695396062, "grad_norm": 0.40265190601348877, "learning_rate": 6.227979666868307e-06, "loss": 0.3441, "step": 3089 }, { "epoch": 1.4369865137188031, "grad_norm": 0.4036969244480133, "learning_rate": 6.225356426991007e-06, "loss": 0.3609, "step": 3090 }, { "epoch": 1.4374515578980003, "grad_norm": 0.3730718493461609, "learning_rate": 6.222732828226714e-06, "loss": 0.3337, "step": 3091 }, { "epoch": 1.4379166020771974, "grad_norm": 0.4534870684146881, "learning_rate": 6.2201088713438366e-06, "loss": 0.369, "step": 3092 }, { "epoch": 1.4383816462563943, "grad_norm": 0.4100678563117981, "learning_rate": 6.2174845571108884e-06, "loss": 0.3617, "step": 3093 }, { "epoch": 1.4388466904355914, "grad_norm": 0.458183616399765, "learning_rate": 6.214859886296491e-06, "loss": 0.3556, "step": 3094 }, { "epoch": 1.4393117346147883, "grad_norm": 0.4203883111476898, "learning_rate": 6.212234859669366e-06, "loss": 0.3503, "step": 3095 }, { "epoch": 1.4397767787939855, "grad_norm": 0.43506595492362976, "learning_rate": 6.209609477998339e-06, "loss": 0.3604, "step": 3096 }, { "epoch": 1.4402418229731824, "grad_norm": 0.37604156136512756, "learning_rate": 6.206983742052345e-06, "loss": 0.317, "step": 3097 }, { "epoch": 1.4407068671523795, "grad_norm": 0.3998353183269501, "learning_rate": 6.204357652600419e-06, "loss": 0.3476, "step": 3098 }, { "epoch": 1.4411719113315766, "grad_norm": 0.4210034906864166, "learning_rate": 6.201731210411698e-06, "loss": 0.368, "step": 3099 }, { "epoch": 1.4416369555107735, "grad_norm": 0.44328659772872925, "learning_rate": 6.199104416255426e-06, "loss": 0.3929, "step": 3100 }, { "epoch": 1.4421019996899704, "grad_norm": 0.44677838683128357, "learning_rate": 6.196477270900947e-06, "loss": 0.3404, "step": 3101 }, { "epoch": 1.4425670438691676, "grad_norm": 0.42385387420654297, "learning_rate": 6.193849775117709e-06, "loss": 0.3779, "step": 3102 }, { "epoch": 1.4430320880483647, "grad_norm": 0.4066379964351654, "learning_rate": 6.191221929675266e-06, "loss": 0.3221, "step": 3103 }, { "epoch": 1.4434971322275616, "grad_norm": 0.40358468890190125, "learning_rate": 6.188593735343269e-06, "loss": 0.332, "step": 3104 }, { "epoch": 1.4439621764067587, "grad_norm": 0.5439780950546265, "learning_rate": 6.185965192891472e-06, "loss": 0.3834, "step": 3105 }, { "epoch": 1.4444272205859556, "grad_norm": 0.4213503897190094, "learning_rate": 6.183336303089735e-06, "loss": 0.3605, "step": 3106 }, { "epoch": 1.4448922647651528, "grad_norm": 0.39889928698539734, "learning_rate": 6.1807070667080145e-06, "loss": 0.3439, "step": 3107 }, { "epoch": 1.4453573089443497, "grad_norm": 0.5045921802520752, "learning_rate": 6.1780774845163736e-06, "loss": 0.3687, "step": 3108 }, { "epoch": 1.4458223531235468, "grad_norm": 0.4484003186225891, "learning_rate": 6.175447557284972e-06, "loss": 0.349, "step": 3109 }, { "epoch": 1.4462873973027437, "grad_norm": 0.4632101058959961, "learning_rate": 6.172817285784076e-06, "loss": 0.3897, "step": 3110 }, { "epoch": 1.4467524414819408, "grad_norm": 0.43179023265838623, "learning_rate": 6.170186670784047e-06, "loss": 0.3275, "step": 3111 }, { "epoch": 1.4472174856611377, "grad_norm": 0.48543015122413635, "learning_rate": 6.1675557130553475e-06, "loss": 0.3395, "step": 3112 }, { "epoch": 1.4476825298403349, "grad_norm": 0.4830179214477539, "learning_rate": 6.164924413368546e-06, "loss": 0.3724, "step": 3113 }, { "epoch": 1.448147574019532, "grad_norm": 0.4735562801361084, "learning_rate": 6.162292772494305e-06, "loss": 0.362, "step": 3114 }, { "epoch": 1.4486126181987289, "grad_norm": 0.43904268741607666, "learning_rate": 6.159660791203392e-06, "loss": 0.3283, "step": 3115 }, { "epoch": 1.4490776623779258, "grad_norm": 0.4425245523452759, "learning_rate": 6.157028470266669e-06, "loss": 0.3637, "step": 3116 }, { "epoch": 1.449542706557123, "grad_norm": 0.40749219059944153, "learning_rate": 6.1543958104551e-06, "loss": 0.3578, "step": 3117 }, { "epoch": 1.45000775073632, "grad_norm": 0.4425008296966553, "learning_rate": 6.15176281253975e-06, "loss": 0.3405, "step": 3118 }, { "epoch": 1.450472794915517, "grad_norm": 0.38328611850738525, "learning_rate": 6.1491294772917785e-06, "loss": 0.3225, "step": 3119 }, { "epoch": 1.450937839094714, "grad_norm": 0.47361764311790466, "learning_rate": 6.146495805482451e-06, "loss": 0.3796, "step": 3120 }, { "epoch": 1.451402883273911, "grad_norm": 0.41950133442878723, "learning_rate": 6.143861797883124e-06, "loss": 0.355, "step": 3121 }, { "epoch": 1.451867927453108, "grad_norm": 0.46674826741218567, "learning_rate": 6.141227455265256e-06, "loss": 0.37, "step": 3122 }, { "epoch": 1.452332971632305, "grad_norm": 0.46177905797958374, "learning_rate": 6.138592778400404e-06, "loss": 0.3742, "step": 3123 }, { "epoch": 1.4527980158115021, "grad_norm": 0.4034256637096405, "learning_rate": 6.135957768060221e-06, "loss": 0.3395, "step": 3124 }, { "epoch": 1.453263059990699, "grad_norm": 0.47683438658714294, "learning_rate": 6.133322425016459e-06, "loss": 0.3755, "step": 3125 }, { "epoch": 1.4537281041698962, "grad_norm": 0.46114102005958557, "learning_rate": 6.1306867500409685e-06, "loss": 0.3385, "step": 3126 }, { "epoch": 1.454193148349093, "grad_norm": 0.40554434061050415, "learning_rate": 6.128050743905695e-06, "loss": 0.3636, "step": 3127 }, { "epoch": 1.4546581925282902, "grad_norm": 0.4275938868522644, "learning_rate": 6.12541440738268e-06, "loss": 0.3578, "step": 3128 }, { "epoch": 1.4551232367074873, "grad_norm": 0.42898625135421753, "learning_rate": 6.122777741244067e-06, "loss": 0.3635, "step": 3129 }, { "epoch": 1.4555882808866842, "grad_norm": 0.4090706706047058, "learning_rate": 6.120140746262091e-06, "loss": 0.329, "step": 3130 }, { "epoch": 1.4560533250658811, "grad_norm": 0.45951342582702637, "learning_rate": 6.117503423209084e-06, "loss": 0.3656, "step": 3131 }, { "epoch": 1.4565183692450783, "grad_norm": 0.3707917332649231, "learning_rate": 6.1148657728574765e-06, "loss": 0.3696, "step": 3132 }, { "epoch": 1.4569834134242754, "grad_norm": 0.4463897943496704, "learning_rate": 6.1122277959797925e-06, "loss": 0.335, "step": 3133 }, { "epoch": 1.4574484576034723, "grad_norm": 0.48531046509742737, "learning_rate": 6.109589493348655e-06, "loss": 0.3419, "step": 3134 }, { "epoch": 1.4579135017826694, "grad_norm": 0.3899511396884918, "learning_rate": 6.106950865736777e-06, "loss": 0.3556, "step": 3135 }, { "epoch": 1.4583785459618663, "grad_norm": 0.3700244426727295, "learning_rate": 6.10431191391697e-06, "loss": 0.3633, "step": 3136 }, { "epoch": 1.4588435901410635, "grad_norm": 0.43946826457977295, "learning_rate": 6.101672638662141e-06, "loss": 0.3294, "step": 3137 }, { "epoch": 1.4593086343202604, "grad_norm": 0.407183438539505, "learning_rate": 6.099033040745292e-06, "loss": 0.3605, "step": 3138 }, { "epoch": 1.4597736784994575, "grad_norm": 0.39797452092170715, "learning_rate": 6.0963931209395165e-06, "loss": 0.3581, "step": 3139 }, { "epoch": 1.4602387226786544, "grad_norm": 0.429902583360672, "learning_rate": 6.0937528800180056e-06, "loss": 0.3423, "step": 3140 }, { "epoch": 1.4607037668578515, "grad_norm": 0.43174222111701965, "learning_rate": 6.0911123187540414e-06, "loss": 0.3604, "step": 3141 }, { "epoch": 1.4611688110370484, "grad_norm": 0.4118611514568329, "learning_rate": 6.088471437921002e-06, "loss": 0.3742, "step": 3142 }, { "epoch": 1.4616338552162456, "grad_norm": 0.37793856859207153, "learning_rate": 6.0858302382923585e-06, "loss": 0.3462, "step": 3143 }, { "epoch": 1.4620988993954427, "grad_norm": 0.4251405894756317, "learning_rate": 6.083188720641676e-06, "loss": 0.3283, "step": 3144 }, { "epoch": 1.4625639435746396, "grad_norm": 0.3901570439338684, "learning_rate": 6.080546885742611e-06, "loss": 0.3649, "step": 3145 }, { "epoch": 1.4630289877538365, "grad_norm": 0.39368346333503723, "learning_rate": 6.077904734368915e-06, "loss": 0.3652, "step": 3146 }, { "epoch": 1.4634940319330336, "grad_norm": 0.4399242103099823, "learning_rate": 6.075262267294432e-06, "loss": 0.3585, "step": 3147 }, { "epoch": 1.4639590761122308, "grad_norm": 0.37305113673210144, "learning_rate": 6.072619485293095e-06, "loss": 0.339, "step": 3148 }, { "epoch": 1.4644241202914277, "grad_norm": 0.4052191376686096, "learning_rate": 6.069976389138934e-06, "loss": 0.3698, "step": 3149 }, { "epoch": 1.4648891644706248, "grad_norm": 0.4240652620792389, "learning_rate": 6.067332979606069e-06, "loss": 0.3417, "step": 3150 }, { "epoch": 1.4653542086498217, "grad_norm": 0.4290774464607239, "learning_rate": 6.064689257468711e-06, "loss": 0.3381, "step": 3151 }, { "epoch": 1.4658192528290188, "grad_norm": 0.3692978024482727, "learning_rate": 6.062045223501163e-06, "loss": 0.3469, "step": 3152 }, { "epoch": 1.4662842970082157, "grad_norm": 0.37645047903060913, "learning_rate": 6.0594008784778206e-06, "loss": 0.3889, "step": 3153 }, { "epoch": 1.4667493411874128, "grad_norm": 0.4383949637413025, "learning_rate": 6.056756223173167e-06, "loss": 0.3384, "step": 3154 }, { "epoch": 1.4672143853666098, "grad_norm": 0.4462020695209503, "learning_rate": 6.054111258361782e-06, "loss": 0.3641, "step": 3155 }, { "epoch": 1.4676794295458069, "grad_norm": 0.38466379046440125, "learning_rate": 6.051465984818332e-06, "loss": 0.3538, "step": 3156 }, { "epoch": 1.4681444737250038, "grad_norm": 0.42687302827835083, "learning_rate": 6.048820403317575e-06, "loss": 0.3286, "step": 3157 }, { "epoch": 1.468609517904201, "grad_norm": 0.4158511161804199, "learning_rate": 6.046174514634355e-06, "loss": 0.3368, "step": 3158 }, { "epoch": 1.469074562083398, "grad_norm": 0.409820020198822, "learning_rate": 6.043528319543615e-06, "loss": 0.3446, "step": 3159 }, { "epoch": 1.469539606262595, "grad_norm": 0.4203595817089081, "learning_rate": 6.04088181882038e-06, "loss": 0.3366, "step": 3160 }, { "epoch": 1.4700046504417918, "grad_norm": 0.4121120274066925, "learning_rate": 6.038235013239767e-06, "loss": 0.3271, "step": 3161 }, { "epoch": 1.470469694620989, "grad_norm": 0.40760576725006104, "learning_rate": 6.035587903576984e-06, "loss": 0.3536, "step": 3162 }, { "epoch": 1.470934738800186, "grad_norm": 0.4346151649951935, "learning_rate": 6.032940490607324e-06, "loss": 0.3356, "step": 3163 }, { "epoch": 1.471399782979383, "grad_norm": 0.459246426820755, "learning_rate": 6.030292775106173e-06, "loss": 0.3679, "step": 3164 }, { "epoch": 1.4718648271585801, "grad_norm": 0.42355459928512573, "learning_rate": 6.027644757849004e-06, "loss": 0.386, "step": 3165 }, { "epoch": 1.472329871337777, "grad_norm": 0.4206094741821289, "learning_rate": 6.024996439611376e-06, "loss": 0.3344, "step": 3166 }, { "epoch": 1.4727949155169742, "grad_norm": 0.36260485649108887, "learning_rate": 6.022347821168941e-06, "loss": 0.3362, "step": 3167 }, { "epoch": 1.473259959696171, "grad_norm": 0.4596819281578064, "learning_rate": 6.0196989032974366e-06, "loss": 0.4029, "step": 3168 }, { "epoch": 1.4737250038753682, "grad_norm": 0.4887857139110565, "learning_rate": 6.017049686772685e-06, "loss": 0.335, "step": 3169 }, { "epoch": 1.474190048054565, "grad_norm": 0.4173000156879425, "learning_rate": 6.0144001723706e-06, "loss": 0.329, "step": 3170 }, { "epoch": 1.4746550922337622, "grad_norm": 0.419015496969223, "learning_rate": 6.011750360867183e-06, "loss": 0.3812, "step": 3171 }, { "epoch": 1.4751201364129591, "grad_norm": 0.5123944282531738, "learning_rate": 6.009100253038518e-06, "loss": 0.3391, "step": 3172 }, { "epoch": 1.4755851805921563, "grad_norm": 0.44715067744255066, "learning_rate": 6.00644984966078e-06, "loss": 0.3153, "step": 3173 }, { "epoch": 1.4760502247713534, "grad_norm": 0.4377215802669525, "learning_rate": 6.003799151510229e-06, "loss": 0.3815, "step": 3174 }, { "epoch": 1.4765152689505503, "grad_norm": 0.5287955403327942, "learning_rate": 6.001148159363213e-06, "loss": 0.362, "step": 3175 }, { "epoch": 1.4769803131297472, "grad_norm": 0.3864191770553589, "learning_rate": 5.998496873996161e-06, "loss": 0.3175, "step": 3176 }, { "epoch": 1.4774453573089443, "grad_norm": 0.45076581835746765, "learning_rate": 5.995845296185594e-06, "loss": 0.3572, "step": 3177 }, { "epoch": 1.4779104014881415, "grad_norm": 0.4349561929702759, "learning_rate": 5.993193426708115e-06, "loss": 0.3452, "step": 3178 }, { "epoch": 1.4783754456673384, "grad_norm": 0.45883357524871826, "learning_rate": 5.990541266340414e-06, "loss": 0.3639, "step": 3179 }, { "epoch": 1.4788404898465355, "grad_norm": 0.4192543923854828, "learning_rate": 5.987888815859266e-06, "loss": 0.3199, "step": 3180 }, { "epoch": 1.4793055340257324, "grad_norm": 0.4741514325141907, "learning_rate": 5.985236076041531e-06, "loss": 0.3624, "step": 3181 }, { "epoch": 1.4797705782049295, "grad_norm": 0.4844418168067932, "learning_rate": 5.982583047664151e-06, "loss": 0.3577, "step": 3182 }, { "epoch": 1.4802356223841264, "grad_norm": 0.4832378923892975, "learning_rate": 5.979929731504158e-06, "loss": 0.3504, "step": 3183 }, { "epoch": 1.4807006665633236, "grad_norm": 0.42179495096206665, "learning_rate": 5.9772761283386626e-06, "loss": 0.3384, "step": 3184 }, { "epoch": 1.4811657107425205, "grad_norm": 0.4342862665653229, "learning_rate": 5.9746222389448635e-06, "loss": 0.3624, "step": 3185 }, { "epoch": 1.4816307549217176, "grad_norm": 0.47698426246643066, "learning_rate": 5.971968064100042e-06, "loss": 0.3483, "step": 3186 }, { "epoch": 1.4820957991009145, "grad_norm": 0.4859686493873596, "learning_rate": 5.969313604581564e-06, "loss": 0.3691, "step": 3187 }, { "epoch": 1.4825608432801116, "grad_norm": 0.4531085789203644, "learning_rate": 5.966658861166874e-06, "loss": 0.3741, "step": 3188 }, { "epoch": 1.4830258874593087, "grad_norm": 0.46530213952064514, "learning_rate": 5.9640038346335045e-06, "loss": 0.3501, "step": 3189 }, { "epoch": 1.4834909316385056, "grad_norm": 0.4475176930427551, "learning_rate": 5.961348525759072e-06, "loss": 0.3515, "step": 3190 }, { "epoch": 1.4839559758177026, "grad_norm": 0.4891159236431122, "learning_rate": 5.958692935321271e-06, "loss": 0.3485, "step": 3191 }, { "epoch": 1.4844210199968997, "grad_norm": 0.4750014841556549, "learning_rate": 5.956037064097881e-06, "loss": 0.3773, "step": 3192 }, { "epoch": 1.4848860641760968, "grad_norm": 0.37893229722976685, "learning_rate": 5.953380912866764e-06, "loss": 0.372, "step": 3193 }, { "epoch": 1.4853511083552937, "grad_norm": 0.41365838050842285, "learning_rate": 5.9507244824058644e-06, "loss": 0.2967, "step": 3194 }, { "epoch": 1.4858161525344908, "grad_norm": 0.5796143412590027, "learning_rate": 5.948067773493205e-06, "loss": 0.4209, "step": 3195 }, { "epoch": 1.4862811967136877, "grad_norm": 0.388439804315567, "learning_rate": 5.945410786906896e-06, "loss": 0.3745, "step": 3196 }, { "epoch": 1.4867462408928849, "grad_norm": 0.3897601068019867, "learning_rate": 5.9427535234251235e-06, "loss": 0.3073, "step": 3197 }, { "epoch": 1.4872112850720818, "grad_norm": 0.5110495090484619, "learning_rate": 5.940095983826157e-06, "loss": 0.3641, "step": 3198 }, { "epoch": 1.487676329251279, "grad_norm": 0.46713900566101074, "learning_rate": 5.9374381688883475e-06, "loss": 0.3414, "step": 3199 }, { "epoch": 1.488141373430476, "grad_norm": 0.4882988929748535, "learning_rate": 5.9347800793901245e-06, "loss": 0.3823, "step": 3200 }, { "epoch": 1.488606417609673, "grad_norm": 0.45495909452438354, "learning_rate": 5.93212171611e-06, "loss": 0.3497, "step": 3201 }, { "epoch": 1.4890714617888698, "grad_norm": 0.38749927282333374, "learning_rate": 5.929463079826565e-06, "loss": 0.3585, "step": 3202 }, { "epoch": 1.489536505968067, "grad_norm": 0.4609941244125366, "learning_rate": 5.9268041713184934e-06, "loss": 0.3501, "step": 3203 }, { "epoch": 1.490001550147264, "grad_norm": 0.38551583886146545, "learning_rate": 5.924144991364533e-06, "loss": 0.3611, "step": 3204 }, { "epoch": 1.490466594326461, "grad_norm": 0.37728172540664673, "learning_rate": 5.921485540743516e-06, "loss": 0.3233, "step": 3205 }, { "epoch": 1.490931638505658, "grad_norm": 0.4780239760875702, "learning_rate": 5.918825820234352e-06, "loss": 0.3523, "step": 3206 }, { "epoch": 1.491396682684855, "grad_norm": 0.43843451142311096, "learning_rate": 5.9161658306160286e-06, "loss": 0.3479, "step": 3207 }, { "epoch": 1.4918617268640522, "grad_norm": 0.43270936608314514, "learning_rate": 5.913505572667615e-06, "loss": 0.3498, "step": 3208 }, { "epoch": 1.492326771043249, "grad_norm": 0.4177435338497162, "learning_rate": 5.910845047168259e-06, "loss": 0.3481, "step": 3209 }, { "epoch": 1.4927918152224462, "grad_norm": 0.4250778257846832, "learning_rate": 5.908184254897183e-06, "loss": 0.338, "step": 3210 }, { "epoch": 1.493256859401643, "grad_norm": 0.40649059414863586, "learning_rate": 5.90552319663369e-06, "loss": 0.3486, "step": 3211 }, { "epoch": 1.4937219035808402, "grad_norm": 0.40856921672821045, "learning_rate": 5.902861873157162e-06, "loss": 0.3573, "step": 3212 }, { "epoch": 1.4941869477600371, "grad_norm": 0.5227980613708496, "learning_rate": 5.900200285247055e-06, "loss": 0.3883, "step": 3213 }, { "epoch": 1.4946519919392343, "grad_norm": 0.3745517134666443, "learning_rate": 5.897538433682909e-06, "loss": 0.3327, "step": 3214 }, { "epoch": 1.4951170361184314, "grad_norm": 0.38121265172958374, "learning_rate": 5.894876319244334e-06, "loss": 0.3519, "step": 3215 }, { "epoch": 1.4955820802976283, "grad_norm": 0.45746102929115295, "learning_rate": 5.892213942711019e-06, "loss": 0.3917, "step": 3216 }, { "epoch": 1.4960471244768252, "grad_norm": 0.47537440061569214, "learning_rate": 5.889551304862735e-06, "loss": 0.3296, "step": 3217 }, { "epoch": 1.4965121686560223, "grad_norm": 0.427947461605072, "learning_rate": 5.8868884064793215e-06, "loss": 0.3772, "step": 3218 }, { "epoch": 1.4969772128352195, "grad_norm": 0.40912094712257385, "learning_rate": 5.884225248340699e-06, "loss": 0.3814, "step": 3219 }, { "epoch": 1.4974422570144164, "grad_norm": 0.40474238991737366, "learning_rate": 5.881561831226865e-06, "loss": 0.346, "step": 3220 }, { "epoch": 1.4979073011936133, "grad_norm": 0.4801974296569824, "learning_rate": 5.878898155917889e-06, "loss": 0.3668, "step": 3221 }, { "epoch": 1.4983723453728104, "grad_norm": 0.4175057113170624, "learning_rate": 5.8762342231939205e-06, "loss": 0.2951, "step": 3222 }, { "epoch": 1.4988373895520075, "grad_norm": 0.41870221495628357, "learning_rate": 5.873570033835181e-06, "loss": 0.3757, "step": 3223 }, { "epoch": 1.4993024337312044, "grad_norm": 0.3709333539009094, "learning_rate": 5.8709055886219665e-06, "loss": 0.3465, "step": 3224 }, { "epoch": 1.4997674779104015, "grad_norm": 0.4033331871032715, "learning_rate": 5.8682408883346535e-06, "loss": 0.3314, "step": 3225 }, { "epoch": 1.5002325220895987, "grad_norm": 0.45277902483940125, "learning_rate": 5.865575933753686e-06, "loss": 0.3526, "step": 3226 }, { "epoch": 1.5006975662687956, "grad_norm": 0.476517915725708, "learning_rate": 5.862910725659586e-06, "loss": 0.3651, "step": 3227 }, { "epoch": 1.5011626104479925, "grad_norm": 0.372768372297287, "learning_rate": 5.860245264832952e-06, "loss": 0.3189, "step": 3228 }, { "epoch": 1.5016276546271896, "grad_norm": 0.4148904085159302, "learning_rate": 5.857579552054454e-06, "loss": 0.3241, "step": 3229 }, { "epoch": 1.5020926988063867, "grad_norm": 0.38212111592292786, "learning_rate": 5.854913588104832e-06, "loss": 0.3015, "step": 3230 }, { "epoch": 1.5025577429855836, "grad_norm": 0.5229579210281372, "learning_rate": 5.85224737376491e-06, "loss": 0.3802, "step": 3231 }, { "epoch": 1.5030227871647805, "grad_norm": 0.37114113569259644, "learning_rate": 5.849580909815573e-06, "loss": 0.3221, "step": 3232 }, { "epoch": 1.5034878313439777, "grad_norm": 0.4346405863761902, "learning_rate": 5.84691419703779e-06, "loss": 0.3551, "step": 3233 }, { "epoch": 1.5039528755231748, "grad_norm": 0.463509202003479, "learning_rate": 5.844247236212593e-06, "loss": 0.3559, "step": 3234 }, { "epoch": 1.5044179197023717, "grad_norm": 0.5213866829872131, "learning_rate": 5.8415800281210945e-06, "loss": 0.3356, "step": 3235 }, { "epoch": 1.5048829638815686, "grad_norm": 0.4189200699329376, "learning_rate": 5.838912573544475e-06, "loss": 0.349, "step": 3236 }, { "epoch": 1.5053480080607657, "grad_norm": 0.41886717081069946, "learning_rate": 5.836244873263989e-06, "loss": 0.3107, "step": 3237 }, { "epoch": 1.5058130522399629, "grad_norm": 0.4343456029891968, "learning_rate": 5.833576928060964e-06, "loss": 0.3686, "step": 3238 }, { "epoch": 1.5062780964191598, "grad_norm": 0.41297784447669983, "learning_rate": 5.830908738716797e-06, "loss": 0.342, "step": 3239 }, { "epoch": 1.5067431405983567, "grad_norm": 0.44648993015289307, "learning_rate": 5.828240306012957e-06, "loss": 0.3716, "step": 3240 }, { "epoch": 1.507208184777554, "grad_norm": 0.40866819024086, "learning_rate": 5.825571630730984e-06, "loss": 0.3111, "step": 3241 }, { "epoch": 1.507673228956751, "grad_norm": 0.45804688334465027, "learning_rate": 5.8229027136524896e-06, "loss": 0.3408, "step": 3242 }, { "epoch": 1.5081382731359478, "grad_norm": 0.4614756107330322, "learning_rate": 5.820233555559157e-06, "loss": 0.3852, "step": 3243 }, { "epoch": 1.508603317315145, "grad_norm": 0.39953604340553284, "learning_rate": 5.81756415723274e-06, "loss": 0.298, "step": 3244 }, { "epoch": 1.509068361494342, "grad_norm": 0.4138416349887848, "learning_rate": 5.814894519455061e-06, "loss": 0.3585, "step": 3245 }, { "epoch": 1.509533405673539, "grad_norm": 0.4480072557926178, "learning_rate": 5.812224643008014e-06, "loss": 0.3583, "step": 3246 }, { "epoch": 1.509998449852736, "grad_norm": 0.3700372576713562, "learning_rate": 5.809554528673562e-06, "loss": 0.2974, "step": 3247 }, { "epoch": 1.510463494031933, "grad_norm": 0.4125301241874695, "learning_rate": 5.806884177233737e-06, "loss": 0.3842, "step": 3248 }, { "epoch": 1.5109285382111302, "grad_norm": 0.4698508679866791, "learning_rate": 5.804213589470644e-06, "loss": 0.3631, "step": 3249 }, { "epoch": 1.511393582390327, "grad_norm": 0.37476885318756104, "learning_rate": 5.801542766166454e-06, "loss": 0.3072, "step": 3250 }, { "epoch": 1.511858626569524, "grad_norm": 0.43928253650665283, "learning_rate": 5.7988717081034066e-06, "loss": 0.3819, "step": 3251 }, { "epoch": 1.512323670748721, "grad_norm": 0.4448581635951996, "learning_rate": 5.796200416063813e-06, "loss": 0.325, "step": 3252 }, { "epoch": 1.5127887149279182, "grad_norm": 0.5041275024414062, "learning_rate": 5.793528890830049e-06, "loss": 0.3903, "step": 3253 }, { "epoch": 1.5132537591071151, "grad_norm": 0.3757099211215973, "learning_rate": 5.790857133184563e-06, "loss": 0.3212, "step": 3254 }, { "epoch": 1.5137188032863123, "grad_norm": 0.5224563479423523, "learning_rate": 5.788185143909868e-06, "loss": 0.3684, "step": 3255 }, { "epoch": 1.5141838474655094, "grad_norm": 0.46224769949913025, "learning_rate": 5.785512923788549e-06, "loss": 0.354, "step": 3256 }, { "epoch": 1.5146488916447063, "grad_norm": 0.4003010392189026, "learning_rate": 5.7828404736032515e-06, "loss": 0.3085, "step": 3257 }, { "epoch": 1.5151139358239032, "grad_norm": 0.4108293950557709, "learning_rate": 5.780167794136696e-06, "loss": 0.3633, "step": 3258 }, { "epoch": 1.5155789800031003, "grad_norm": 0.4604003131389618, "learning_rate": 5.777494886171667e-06, "loss": 0.3237, "step": 3259 }, { "epoch": 1.5160440241822974, "grad_norm": 0.49842220544815063, "learning_rate": 5.7748217504910145e-06, "loss": 0.3905, "step": 3260 }, { "epoch": 1.5165090683614944, "grad_norm": 0.4144563376903534, "learning_rate": 5.772148387877656e-06, "loss": 0.3754, "step": 3261 }, { "epoch": 1.5169741125406913, "grad_norm": 0.4629983901977539, "learning_rate": 5.7694747991145775e-06, "loss": 0.3352, "step": 3262 }, { "epoch": 1.5174391567198884, "grad_norm": 0.3761589825153351, "learning_rate": 5.766800984984828e-06, "loss": 0.3488, "step": 3263 }, { "epoch": 1.5179042008990855, "grad_norm": 0.3842097222805023, "learning_rate": 5.764126946271526e-06, "loss": 0.3479, "step": 3264 }, { "epoch": 1.5183692450782824, "grad_norm": 0.39669203758239746, "learning_rate": 5.7614526837578525e-06, "loss": 0.3421, "step": 3265 }, { "epoch": 1.5188342892574793, "grad_norm": 0.42259714007377625, "learning_rate": 5.758778198227057e-06, "loss": 0.36, "step": 3266 }, { "epoch": 1.5192993334366764, "grad_norm": 0.39835333824157715, "learning_rate": 5.7561034904624525e-06, "loss": 0.3514, "step": 3267 }, { "epoch": 1.5197643776158736, "grad_norm": 0.383436918258667, "learning_rate": 5.753428561247416e-06, "loss": 0.3601, "step": 3268 }, { "epoch": 1.5202294217950705, "grad_norm": 0.39197006821632385, "learning_rate": 5.750753411365394e-06, "loss": 0.3377, "step": 3269 }, { "epoch": 1.5206944659742676, "grad_norm": 0.4436843991279602, "learning_rate": 5.74807804159989e-06, "loss": 0.3428, "step": 3270 }, { "epoch": 1.5211595101534647, "grad_norm": 0.3785765469074249, "learning_rate": 5.74540245273448e-06, "loss": 0.3329, "step": 3271 }, { "epoch": 1.5216245543326616, "grad_norm": 0.3833819329738617, "learning_rate": 5.7427266455528e-06, "loss": 0.3533, "step": 3272 }, { "epoch": 1.5220895985118585, "grad_norm": 0.4789281189441681, "learning_rate": 5.7400506208385486e-06, "loss": 0.3841, "step": 3273 }, { "epoch": 1.5225546426910557, "grad_norm": 0.4086497724056244, "learning_rate": 5.737374379375491e-06, "loss": 0.3316, "step": 3274 }, { "epoch": 1.5230196868702528, "grad_norm": 0.39715808629989624, "learning_rate": 5.734697921947456e-06, "loss": 0.3326, "step": 3275 }, { "epoch": 1.5234847310494497, "grad_norm": 0.45392605662345886, "learning_rate": 5.732021249338333e-06, "loss": 0.3432, "step": 3276 }, { "epoch": 1.5239497752286466, "grad_norm": 0.4217411279678345, "learning_rate": 5.729344362332075e-06, "loss": 0.3766, "step": 3277 }, { "epoch": 1.5244148194078437, "grad_norm": 0.4188506603240967, "learning_rate": 5.7266672617127014e-06, "loss": 0.3234, "step": 3278 }, { "epoch": 1.5248798635870409, "grad_norm": 0.40585535764694214, "learning_rate": 5.723989948264291e-06, "loss": 0.3499, "step": 3279 }, { "epoch": 1.5253449077662378, "grad_norm": 0.4009935259819031, "learning_rate": 5.721312422770984e-06, "loss": 0.359, "step": 3280 }, { "epoch": 1.5258099519454347, "grad_norm": 0.4260950982570648, "learning_rate": 5.718634686016985e-06, "loss": 0.3418, "step": 3281 }, { "epoch": 1.5262749961246318, "grad_norm": 0.40163952112197876, "learning_rate": 5.715956738786559e-06, "loss": 0.3451, "step": 3282 }, { "epoch": 1.526740040303829, "grad_norm": 0.38455137610435486, "learning_rate": 5.713278581864032e-06, "loss": 0.3522, "step": 3283 }, { "epoch": 1.5272050844830258, "grad_norm": 0.3814459443092346, "learning_rate": 5.710600216033797e-06, "loss": 0.3334, "step": 3284 }, { "epoch": 1.527670128662223, "grad_norm": 0.4370444416999817, "learning_rate": 5.7079216420803e-06, "loss": 0.3619, "step": 3285 }, { "epoch": 1.52813517284142, "grad_norm": 0.40159928798675537, "learning_rate": 5.705242860788052e-06, "loss": 0.3379, "step": 3286 }, { "epoch": 1.528600217020617, "grad_norm": 0.4162231385707855, "learning_rate": 5.7025638729416275e-06, "loss": 0.3562, "step": 3287 }, { "epoch": 1.529065261199814, "grad_norm": 0.36917373538017273, "learning_rate": 5.699884679325656e-06, "loss": 0.3496, "step": 3288 }, { "epoch": 1.529530305379011, "grad_norm": 0.36086469888687134, "learning_rate": 5.697205280724828e-06, "loss": 0.33, "step": 3289 }, { "epoch": 1.5299953495582082, "grad_norm": 0.434421181678772, "learning_rate": 5.6945256779239e-06, "loss": 0.373, "step": 3290 }, { "epoch": 1.530460393737405, "grad_norm": 0.39509040117263794, "learning_rate": 5.691845871707682e-06, "loss": 0.3426, "step": 3291 }, { "epoch": 1.530925437916602, "grad_norm": 0.38857322931289673, "learning_rate": 5.689165862861046e-06, "loss": 0.3389, "step": 3292 }, { "epoch": 1.531390482095799, "grad_norm": 0.38726887106895447, "learning_rate": 5.686485652168923e-06, "loss": 0.3262, "step": 3293 }, { "epoch": 1.5318555262749962, "grad_norm": 0.4127531051635742, "learning_rate": 5.683805240416302e-06, "loss": 0.3861, "step": 3294 }, { "epoch": 1.5323205704541931, "grad_norm": 0.3922295868396759, "learning_rate": 5.681124628388235e-06, "loss": 0.3506, "step": 3295 }, { "epoch": 1.53278561463339, "grad_norm": 0.38945940136909485, "learning_rate": 5.678443816869828e-06, "loss": 0.34, "step": 3296 }, { "epoch": 1.5332506588125872, "grad_norm": 0.38314536213874817, "learning_rate": 5.675762806646247e-06, "loss": 0.3136, "step": 3297 }, { "epoch": 1.5337157029917843, "grad_norm": 0.4257749021053314, "learning_rate": 5.673081598502715e-06, "loss": 0.3641, "step": 3298 }, { "epoch": 1.5341807471709812, "grad_norm": 0.4407237470149994, "learning_rate": 5.670400193224516e-06, "loss": 0.3381, "step": 3299 }, { "epoch": 1.5346457913501783, "grad_norm": 0.4940028190612793, "learning_rate": 5.66771859159699e-06, "loss": 0.336, "step": 3300 }, { "epoch": 1.5351108355293754, "grad_norm": 0.40870407223701477, "learning_rate": 5.665036794405535e-06, "loss": 0.3687, "step": 3301 }, { "epoch": 1.5355758797085723, "grad_norm": 0.4065590500831604, "learning_rate": 5.662354802435606e-06, "loss": 0.3937, "step": 3302 }, { "epoch": 1.5360409238877692, "grad_norm": 0.39256197214126587, "learning_rate": 5.659672616472712e-06, "loss": 0.3364, "step": 3303 }, { "epoch": 1.5365059680669664, "grad_norm": 0.4631505608558655, "learning_rate": 5.656990237302426e-06, "loss": 0.3388, "step": 3304 }, { "epoch": 1.5369710122461635, "grad_norm": 0.3832097053527832, "learning_rate": 5.6543076657103705e-06, "loss": 0.3108, "step": 3305 }, { "epoch": 1.5374360564253604, "grad_norm": 0.44715628027915955, "learning_rate": 5.651624902482225e-06, "loss": 0.3716, "step": 3306 }, { "epoch": 1.5379011006045573, "grad_norm": 0.5109707713127136, "learning_rate": 5.648941948403732e-06, "loss": 0.3541, "step": 3307 }, { "epoch": 1.5383661447837544, "grad_norm": 0.35396090149879456, "learning_rate": 5.646258804260685e-06, "loss": 0.3068, "step": 3308 }, { "epoch": 1.5388311889629516, "grad_norm": 0.5051494836807251, "learning_rate": 5.643575470838929e-06, "loss": 0.4256, "step": 3309 }, { "epoch": 1.5392962331421485, "grad_norm": 0.4141186475753784, "learning_rate": 5.640891948924373e-06, "loss": 0.3335, "step": 3310 }, { "epoch": 1.5397612773213454, "grad_norm": 0.3828221559524536, "learning_rate": 5.638208239302975e-06, "loss": 0.3464, "step": 3311 }, { "epoch": 1.5402263215005425, "grad_norm": 0.39551404118537903, "learning_rate": 5.6355243427607475e-06, "loss": 0.387, "step": 3312 }, { "epoch": 1.5406913656797396, "grad_norm": 0.3690919280052185, "learning_rate": 5.632840260083766e-06, "loss": 0.2866, "step": 3313 }, { "epoch": 1.5411564098589365, "grad_norm": 0.458450585603714, "learning_rate": 5.630155992058151e-06, "loss": 0.4064, "step": 3314 }, { "epoch": 1.5416214540381337, "grad_norm": 0.4220149517059326, "learning_rate": 5.6274715394700805e-06, "loss": 0.377, "step": 3315 }, { "epoch": 1.5420864982173308, "grad_norm": 0.41717949509620667, "learning_rate": 5.62478690310579e-06, "loss": 0.3553, "step": 3316 }, { "epoch": 1.5425515423965277, "grad_norm": 0.42877861857414246, "learning_rate": 5.622102083751563e-06, "loss": 0.3538, "step": 3317 }, { "epoch": 1.5430165865757246, "grad_norm": 0.44071346521377563, "learning_rate": 5.61941708219374e-06, "loss": 0.3461, "step": 3318 }, { "epoch": 1.5434816307549217, "grad_norm": 0.47426027059555054, "learning_rate": 5.6167318992187155e-06, "loss": 0.3504, "step": 3319 }, { "epoch": 1.5439466749341189, "grad_norm": 0.4373508393764496, "learning_rate": 5.614046535612936e-06, "loss": 0.3585, "step": 3320 }, { "epoch": 1.5444117191133158, "grad_norm": 0.48637011647224426, "learning_rate": 5.6113609921629e-06, "loss": 0.3465, "step": 3321 }, { "epoch": 1.5448767632925127, "grad_norm": 0.3798205852508545, "learning_rate": 5.60867526965516e-06, "loss": 0.3345, "step": 3322 }, { "epoch": 1.5453418074717098, "grad_norm": 0.3805210590362549, "learning_rate": 5.60598936887632e-06, "loss": 0.3352, "step": 3323 }, { "epoch": 1.545806851650907, "grad_norm": 0.44706982374191284, "learning_rate": 5.603303290613036e-06, "loss": 0.3934, "step": 3324 }, { "epoch": 1.5462718958301038, "grad_norm": 0.38400477170944214, "learning_rate": 5.600617035652019e-06, "loss": 0.3168, "step": 3325 }, { "epoch": 1.5467369400093007, "grad_norm": 0.3989894688129425, "learning_rate": 5.597930604780028e-06, "loss": 0.3661, "step": 3326 }, { "epoch": 1.5472019841884979, "grad_norm": 0.40619131922721863, "learning_rate": 5.595243998783876e-06, "loss": 0.355, "step": 3327 }, { "epoch": 1.547667028367695, "grad_norm": 0.4321429431438446, "learning_rate": 5.592557218450424e-06, "loss": 0.3404, "step": 3328 }, { "epoch": 1.548132072546892, "grad_norm": 0.3868047893047333, "learning_rate": 5.589870264566588e-06, "loss": 0.3609, "step": 3329 }, { "epoch": 1.548597116726089, "grad_norm": 0.4493039548397064, "learning_rate": 5.587183137919332e-06, "loss": 0.3628, "step": 3330 }, { "epoch": 1.5490621609052861, "grad_norm": 0.4076448082923889, "learning_rate": 5.584495839295674e-06, "loss": 0.3429, "step": 3331 }, { "epoch": 1.549527205084483, "grad_norm": 0.4267314374446869, "learning_rate": 5.58180836948268e-06, "loss": 0.3768, "step": 3332 }, { "epoch": 1.54999224926368, "grad_norm": 0.4145706295967102, "learning_rate": 5.579120729267463e-06, "loss": 0.3438, "step": 3333 }, { "epoch": 1.550457293442877, "grad_norm": 0.42286407947540283, "learning_rate": 5.576432919437193e-06, "loss": 0.3253, "step": 3334 }, { "epoch": 1.5509223376220742, "grad_norm": 0.3949490785598755, "learning_rate": 5.57374494077908e-06, "loss": 0.346, "step": 3335 }, { "epoch": 1.5513873818012711, "grad_norm": 0.44627898931503296, "learning_rate": 5.571056794080396e-06, "loss": 0.3721, "step": 3336 }, { "epoch": 1.551852425980468, "grad_norm": 0.3899642527103424, "learning_rate": 5.568368480128453e-06, "loss": 0.3405, "step": 3337 }, { "epoch": 1.5523174701596651, "grad_norm": 0.4098135530948639, "learning_rate": 5.565679999710614e-06, "loss": 0.3319, "step": 3338 }, { "epoch": 1.5527825143388623, "grad_norm": 0.39709505438804626, "learning_rate": 5.562991353614292e-06, "loss": 0.3294, "step": 3339 }, { "epoch": 1.5532475585180592, "grad_norm": 0.3686107099056244, "learning_rate": 5.560302542626947e-06, "loss": 0.359, "step": 3340 }, { "epoch": 1.553712602697256, "grad_norm": 0.3666870892047882, "learning_rate": 5.557613567536087e-06, "loss": 0.3197, "step": 3341 }, { "epoch": 1.5541776468764532, "grad_norm": 0.4867759048938751, "learning_rate": 5.554924429129271e-06, "loss": 0.3515, "step": 3342 }, { "epoch": 1.5546426910556503, "grad_norm": 0.3786954879760742, "learning_rate": 5.552235128194105e-06, "loss": 0.3297, "step": 3343 }, { "epoch": 1.5551077352348472, "grad_norm": 0.3850143849849701, "learning_rate": 5.5495456655182376e-06, "loss": 0.3379, "step": 3344 }, { "epoch": 1.5555727794140444, "grad_norm": 0.4032987952232361, "learning_rate": 5.546856041889374e-06, "loss": 0.3273, "step": 3345 }, { "epoch": 1.5560378235932415, "grad_norm": 0.4159895181655884, "learning_rate": 5.544166258095256e-06, "loss": 0.3851, "step": 3346 }, { "epoch": 1.5565028677724384, "grad_norm": 0.41379231214523315, "learning_rate": 5.54147631492368e-06, "loss": 0.3465, "step": 3347 }, { "epoch": 1.5569679119516353, "grad_norm": 0.36884400248527527, "learning_rate": 5.538786213162487e-06, "loss": 0.3264, "step": 3348 }, { "epoch": 1.5574329561308324, "grad_norm": 0.3928481638431549, "learning_rate": 5.536095953599565e-06, "loss": 0.3351, "step": 3349 }, { "epoch": 1.5578980003100296, "grad_norm": 0.4361253082752228, "learning_rate": 5.533405537022846e-06, "loss": 0.3428, "step": 3350 }, { "epoch": 1.5583630444892265, "grad_norm": 0.36980322003364563, "learning_rate": 5.530714964220308e-06, "loss": 0.3544, "step": 3351 }, { "epoch": 1.5588280886684234, "grad_norm": 0.3828973174095154, "learning_rate": 5.528024235979978e-06, "loss": 0.3581, "step": 3352 }, { "epoch": 1.5592931328476205, "grad_norm": 0.3844209313392639, "learning_rate": 5.525333353089926e-06, "loss": 0.3084, "step": 3353 }, { "epoch": 1.5597581770268176, "grad_norm": 0.4435608386993408, "learning_rate": 5.522642316338268e-06, "loss": 0.3475, "step": 3354 }, { "epoch": 1.5602232212060145, "grad_norm": 0.36281177401542664, "learning_rate": 5.519951126513164e-06, "loss": 0.3257, "step": 3355 }, { "epoch": 1.5606882653852114, "grad_norm": 0.4244144558906555, "learning_rate": 5.517259784402823e-06, "loss": 0.3716, "step": 3356 }, { "epoch": 1.5611533095644086, "grad_norm": 0.39875200390815735, "learning_rate": 5.514568290795492e-06, "loss": 0.3374, "step": 3357 }, { "epoch": 1.5616183537436057, "grad_norm": 0.42064738273620605, "learning_rate": 5.511876646479466e-06, "loss": 0.3643, "step": 3358 }, { "epoch": 1.5620833979228026, "grad_norm": 0.4080541133880615, "learning_rate": 5.509184852243084e-06, "loss": 0.3452, "step": 3359 }, { "epoch": 1.5625484421019997, "grad_norm": 0.3764135539531708, "learning_rate": 5.5064929088747324e-06, "loss": 0.3365, "step": 3360 }, { "epoch": 1.5630134862811969, "grad_norm": 0.39057546854019165, "learning_rate": 5.503800817162833e-06, "loss": 0.3422, "step": 3361 }, { "epoch": 1.5634785304603938, "grad_norm": 0.3925474286079407, "learning_rate": 5.501108577895858e-06, "loss": 0.3133, "step": 3362 }, { "epoch": 1.5639435746395907, "grad_norm": 0.4536259174346924, "learning_rate": 5.49841619186232e-06, "loss": 0.3756, "step": 3363 }, { "epoch": 1.5644086188187878, "grad_norm": 0.4168597161769867, "learning_rate": 5.495723659850776e-06, "loss": 0.3313, "step": 3364 }, { "epoch": 1.564873662997985, "grad_norm": 0.35290852189064026, "learning_rate": 5.493030982649823e-06, "loss": 0.3392, "step": 3365 }, { "epoch": 1.5653387071771818, "grad_norm": 0.36755627393722534, "learning_rate": 5.4903381610481034e-06, "loss": 0.3271, "step": 3366 }, { "epoch": 1.5658037513563787, "grad_norm": 0.42110899090766907, "learning_rate": 5.487645195834302e-06, "loss": 0.3726, "step": 3367 }, { "epoch": 1.5662687955355759, "grad_norm": 0.3488036096096039, "learning_rate": 5.484952087797144e-06, "loss": 0.3097, "step": 3368 }, { "epoch": 1.566733839714773, "grad_norm": 0.4092177152633667, "learning_rate": 5.482258837725397e-06, "loss": 0.3727, "step": 3369 }, { "epoch": 1.5671988838939699, "grad_norm": 0.3953334093093872, "learning_rate": 5.479565446407867e-06, "loss": 0.3146, "step": 3370 }, { "epoch": 1.5676639280731668, "grad_norm": 0.3620423376560211, "learning_rate": 5.47687191463341e-06, "loss": 0.3622, "step": 3371 }, { "epoch": 1.5681289722523641, "grad_norm": 0.3988654613494873, "learning_rate": 5.4741782431909144e-06, "loss": 0.345, "step": 3372 }, { "epoch": 1.568594016431561, "grad_norm": 0.3772416114807129, "learning_rate": 5.471484432869314e-06, "loss": 0.3463, "step": 3373 }, { "epoch": 1.569059060610758, "grad_norm": 0.4692382216453552, "learning_rate": 5.4687904844575814e-06, "loss": 0.3523, "step": 3374 }, { "epoch": 1.569524104789955, "grad_norm": 0.38080427050590515, "learning_rate": 5.46609639874473e-06, "loss": 0.3665, "step": 3375 }, { "epoch": 1.5699891489691522, "grad_norm": 0.3956538438796997, "learning_rate": 5.4634021765198135e-06, "loss": 0.3536, "step": 3376 }, { "epoch": 1.570454193148349, "grad_norm": 0.46239396929740906, "learning_rate": 5.460707818571928e-06, "loss": 0.3549, "step": 3377 }, { "epoch": 1.570919237327546, "grad_norm": 0.42876169085502625, "learning_rate": 5.458013325690205e-06, "loss": 0.3599, "step": 3378 }, { "epoch": 1.5713842815067431, "grad_norm": 0.3865092992782593, "learning_rate": 5.455318698663819e-06, "loss": 0.3271, "step": 3379 }, { "epoch": 1.5718493256859403, "grad_norm": 0.40359625220298767, "learning_rate": 5.452623938281983e-06, "loss": 0.3451, "step": 3380 }, { "epoch": 1.5723143698651372, "grad_norm": 0.4457720220088959, "learning_rate": 5.449929045333946e-06, "loss": 0.3897, "step": 3381 }, { "epoch": 1.572779414044334, "grad_norm": 0.4244859516620636, "learning_rate": 5.447234020608999e-06, "loss": 0.3683, "step": 3382 }, { "epoch": 1.5732444582235312, "grad_norm": 0.37125489115715027, "learning_rate": 5.444538864896472e-06, "loss": 0.3132, "step": 3383 }, { "epoch": 1.5737095024027283, "grad_norm": 0.3960449993610382, "learning_rate": 5.441843578985735e-06, "loss": 0.3133, "step": 3384 }, { "epoch": 1.5741745465819252, "grad_norm": 0.4028528034687042, "learning_rate": 5.439148163666188e-06, "loss": 0.367, "step": 3385 }, { "epoch": 1.5746395907611221, "grad_norm": 0.38724517822265625, "learning_rate": 5.436452619727278e-06, "loss": 0.3709, "step": 3386 }, { "epoch": 1.5751046349403195, "grad_norm": 0.41816893219947815, "learning_rate": 5.4337569479584866e-06, "loss": 0.3545, "step": 3387 }, { "epoch": 1.5755696791195164, "grad_norm": 0.3713048994541168, "learning_rate": 5.431061149149327e-06, "loss": 0.3023, "step": 3388 }, { "epoch": 1.5760347232987133, "grad_norm": 0.37138456106185913, "learning_rate": 5.428365224089362e-06, "loss": 0.3382, "step": 3389 }, { "epoch": 1.5764997674779104, "grad_norm": 0.39625900983810425, "learning_rate": 5.425669173568179e-06, "loss": 0.3549, "step": 3390 }, { "epoch": 1.5769648116571076, "grad_norm": 0.4074704349040985, "learning_rate": 5.42297299837541e-06, "loss": 0.3267, "step": 3391 }, { "epoch": 1.5774298558363045, "grad_norm": 0.41984236240386963, "learning_rate": 5.42027669930072e-06, "loss": 0.3526, "step": 3392 }, { "epoch": 1.5778949000155014, "grad_norm": 0.38742825388908386, "learning_rate": 5.417580277133812e-06, "loss": 0.3161, "step": 3393 }, { "epoch": 1.5783599441946985, "grad_norm": 0.41518786549568176, "learning_rate": 5.414883732664422e-06, "loss": 0.3263, "step": 3394 }, { "epoch": 1.5788249883738956, "grad_norm": 0.41794711351394653, "learning_rate": 5.412187066682327e-06, "loss": 0.3718, "step": 3395 }, { "epoch": 1.5792900325530925, "grad_norm": 0.42802178859710693, "learning_rate": 5.409490279977335e-06, "loss": 0.3883, "step": 3396 }, { "epoch": 1.5797550767322894, "grad_norm": 0.3825167715549469, "learning_rate": 5.406793373339292e-06, "loss": 0.3426, "step": 3397 }, { "epoch": 1.5802201209114866, "grad_norm": 0.3995634913444519, "learning_rate": 5.404096347558078e-06, "loss": 0.3749, "step": 3398 }, { "epoch": 1.5806851650906837, "grad_norm": 0.35901325941085815, "learning_rate": 5.4013992034236065e-06, "loss": 0.3438, "step": 3399 }, { "epoch": 1.5811502092698806, "grad_norm": 0.3686612546443939, "learning_rate": 5.398701941725827e-06, "loss": 0.3198, "step": 3400 }, { "epoch": 1.5816152534490775, "grad_norm": 0.3947772979736328, "learning_rate": 5.396004563254728e-06, "loss": 0.3274, "step": 3401 }, { "epoch": 1.5820802976282748, "grad_norm": 0.45323094725608826, "learning_rate": 5.393307068800322e-06, "loss": 0.4098, "step": 3402 }, { "epoch": 1.5825453418074718, "grad_norm": 0.346132755279541, "learning_rate": 5.390609459152666e-06, "loss": 0.3316, "step": 3403 }, { "epoch": 1.5830103859866687, "grad_norm": 0.39992961287498474, "learning_rate": 5.387911735101845e-06, "loss": 0.3548, "step": 3404 }, { "epoch": 1.5834754301658658, "grad_norm": 0.40256255865097046, "learning_rate": 5.385213897437975e-06, "loss": 0.3207, "step": 3405 }, { "epoch": 1.583940474345063, "grad_norm": 0.428801029920578, "learning_rate": 5.3825159469512135e-06, "loss": 0.3389, "step": 3406 }, { "epoch": 1.5844055185242598, "grad_norm": 0.40780290961265564, "learning_rate": 5.3798178844317435e-06, "loss": 0.3221, "step": 3407 }, { "epoch": 1.5848705627034567, "grad_norm": 0.38632360100746155, "learning_rate": 5.377119710669785e-06, "loss": 0.3096, "step": 3408 }, { "epoch": 1.5853356068826538, "grad_norm": 0.45682859420776367, "learning_rate": 5.374421426455589e-06, "loss": 0.3737, "step": 3409 }, { "epoch": 1.585800651061851, "grad_norm": 0.3837336599826813, "learning_rate": 5.371723032579439e-06, "loss": 0.3283, "step": 3410 }, { "epoch": 1.5862656952410479, "grad_norm": 0.4327690303325653, "learning_rate": 5.369024529831649e-06, "loss": 0.3497, "step": 3411 }, { "epoch": 1.5867307394202448, "grad_norm": 0.4153764843940735, "learning_rate": 5.366325919002569e-06, "loss": 0.3354, "step": 3412 }, { "epoch": 1.587195783599442, "grad_norm": 0.399143785238266, "learning_rate": 5.36362720088258e-06, "loss": 0.3495, "step": 3413 }, { "epoch": 1.587660827778639, "grad_norm": 0.40303295850753784, "learning_rate": 5.3609283762620875e-06, "loss": 0.3219, "step": 3414 }, { "epoch": 1.588125871957836, "grad_norm": 0.40428194403648376, "learning_rate": 5.358229445931538e-06, "loss": 0.3199, "step": 3415 }, { "epoch": 1.588590916137033, "grad_norm": 0.41889360547065735, "learning_rate": 5.355530410681402e-06, "loss": 0.3695, "step": 3416 }, { "epoch": 1.5890559603162302, "grad_norm": 0.43096888065338135, "learning_rate": 5.352831271302183e-06, "loss": 0.3836, "step": 3417 }, { "epoch": 1.589521004495427, "grad_norm": 0.40473613142967224, "learning_rate": 5.350132028584416e-06, "loss": 0.3579, "step": 3418 }, { "epoch": 1.589986048674624, "grad_norm": 0.39574551582336426, "learning_rate": 5.3474326833186656e-06, "loss": 0.3357, "step": 3419 }, { "epoch": 1.5904510928538211, "grad_norm": 0.38140177726745605, "learning_rate": 5.344733236295525e-06, "loss": 0.3498, "step": 3420 }, { "epoch": 1.5909161370330183, "grad_norm": 0.4418228566646576, "learning_rate": 5.34203368830562e-06, "loss": 0.3608, "step": 3421 }, { "epoch": 1.5913811812122152, "grad_norm": 0.34866416454315186, "learning_rate": 5.339334040139603e-06, "loss": 0.3159, "step": 3422 }, { "epoch": 1.591846225391412, "grad_norm": 0.4378582239151001, "learning_rate": 5.336634292588156e-06, "loss": 0.4073, "step": 3423 }, { "epoch": 1.5923112695706092, "grad_norm": 0.3770564794540405, "learning_rate": 5.333934446441994e-06, "loss": 0.3165, "step": 3424 }, { "epoch": 1.5927763137498063, "grad_norm": 0.4754953682422638, "learning_rate": 5.3312345024918575e-06, "loss": 0.4054, "step": 3425 }, { "epoch": 1.5932413579290032, "grad_norm": 0.3792072534561157, "learning_rate": 5.328534461528515e-06, "loss": 0.2908, "step": 3426 }, { "epoch": 1.5937064021082001, "grad_norm": 0.4275760054588318, "learning_rate": 5.325834324342765e-06, "loss": 0.3807, "step": 3427 }, { "epoch": 1.5941714462873973, "grad_norm": 0.4156563878059387, "learning_rate": 5.323134091725434e-06, "loss": 0.3607, "step": 3428 }, { "epoch": 1.5946364904665944, "grad_norm": 0.3854392170906067, "learning_rate": 5.320433764467375e-06, "loss": 0.3535, "step": 3429 }, { "epoch": 1.5951015346457913, "grad_norm": 0.40652233362197876, "learning_rate": 5.3177333433594734e-06, "loss": 0.338, "step": 3430 }, { "epoch": 1.5955665788249884, "grad_norm": 0.4638773202896118, "learning_rate": 5.315032829192636e-06, "loss": 0.3495, "step": 3431 }, { "epoch": 1.5960316230041856, "grad_norm": 0.40089505910873413, "learning_rate": 5.312332222757799e-06, "loss": 0.309, "step": 3432 }, { "epoch": 1.5964966671833825, "grad_norm": 0.41283005475997925, "learning_rate": 5.309631524845929e-06, "loss": 0.3703, "step": 3433 }, { "epoch": 1.5969617113625794, "grad_norm": 0.4164558947086334, "learning_rate": 5.306930736248013e-06, "loss": 0.3475, "step": 3434 }, { "epoch": 1.5974267555417765, "grad_norm": 0.38702747225761414, "learning_rate": 5.30422985775507e-06, "loss": 0.3319, "step": 3435 }, { "epoch": 1.5978917997209736, "grad_norm": 0.3754745423793793, "learning_rate": 5.301528890158143e-06, "loss": 0.3057, "step": 3436 }, { "epoch": 1.5983568439001705, "grad_norm": 0.41013574600219727, "learning_rate": 5.298827834248303e-06, "loss": 0.3675, "step": 3437 }, { "epoch": 1.5988218880793674, "grad_norm": 0.43470141291618347, "learning_rate": 5.296126690816644e-06, "loss": 0.3695, "step": 3438 }, { "epoch": 1.5992869322585646, "grad_norm": 0.42870068550109863, "learning_rate": 5.293425460654288e-06, "loss": 0.3576, "step": 3439 }, { "epoch": 1.5997519764377617, "grad_norm": 0.3700660765171051, "learning_rate": 5.290724144552379e-06, "loss": 0.3345, "step": 3440 }, { "epoch": 1.6002170206169586, "grad_norm": 0.37624475359916687, "learning_rate": 5.288022743302093e-06, "loss": 0.3434, "step": 3441 }, { "epoch": 1.6006820647961555, "grad_norm": 0.4745047092437744, "learning_rate": 5.2853212576946225e-06, "loss": 0.367, "step": 3442 }, { "epoch": 1.6011471089753526, "grad_norm": 0.3658214807510376, "learning_rate": 5.282619688521189e-06, "loss": 0.3596, "step": 3443 }, { "epoch": 1.6016121531545497, "grad_norm": 0.44426223635673523, "learning_rate": 5.2799180365730405e-06, "loss": 0.3377, "step": 3444 }, { "epoch": 1.6020771973337466, "grad_norm": 0.3807421326637268, "learning_rate": 5.2772163026414455e-06, "loss": 0.3378, "step": 3445 }, { "epoch": 1.6025422415129438, "grad_norm": 0.4405421316623688, "learning_rate": 5.274514487517698e-06, "loss": 0.3689, "step": 3446 }, { "epoch": 1.603007285692141, "grad_norm": 0.36217546463012695, "learning_rate": 5.271812591993116e-06, "loss": 0.3242, "step": 3447 }, { "epoch": 1.6034723298713378, "grad_norm": 0.4049311876296997, "learning_rate": 5.269110616859041e-06, "loss": 0.3661, "step": 3448 }, { "epoch": 1.6039373740505347, "grad_norm": 0.4050389230251312, "learning_rate": 5.266408562906838e-06, "loss": 0.3447, "step": 3449 }, { "epoch": 1.6044024182297318, "grad_norm": 0.40756580233573914, "learning_rate": 5.263706430927895e-06, "loss": 0.3566, "step": 3450 }, { "epoch": 1.604867462408929, "grad_norm": 0.3964058458805084, "learning_rate": 5.261004221713621e-06, "loss": 0.3193, "step": 3451 }, { "epoch": 1.6053325065881259, "grad_norm": 0.439263254404068, "learning_rate": 5.258301936055449e-06, "loss": 0.359, "step": 3452 }, { "epoch": 1.6057975507673228, "grad_norm": 0.4526593089103699, "learning_rate": 5.255599574744836e-06, "loss": 0.3515, "step": 3453 }, { "epoch": 1.60626259494652, "grad_norm": 0.43033599853515625, "learning_rate": 5.252897138573261e-06, "loss": 0.3492, "step": 3454 }, { "epoch": 1.606727639125717, "grad_norm": 0.43363288044929504, "learning_rate": 5.2501946283322204e-06, "loss": 0.3523, "step": 3455 }, { "epoch": 1.607192683304914, "grad_norm": 0.3905112147331238, "learning_rate": 5.247492044813237e-06, "loss": 0.3388, "step": 3456 }, { "epoch": 1.6076577274841108, "grad_norm": 0.45133721828460693, "learning_rate": 5.244789388807855e-06, "loss": 0.3748, "step": 3457 }, { "epoch": 1.608122771663308, "grad_norm": 0.5364686250686646, "learning_rate": 5.242086661107635e-06, "loss": 0.3814, "step": 3458 }, { "epoch": 1.608587815842505, "grad_norm": 0.4719644784927368, "learning_rate": 5.239383862504166e-06, "loss": 0.3346, "step": 3459 }, { "epoch": 1.609052860021702, "grad_norm": 0.4059002697467804, "learning_rate": 5.236680993789052e-06, "loss": 0.3241, "step": 3460 }, { "epoch": 1.6095179042008991, "grad_norm": 0.4945448040962219, "learning_rate": 5.2339780557539185e-06, "loss": 0.3588, "step": 3461 }, { "epoch": 1.6099829483800963, "grad_norm": 0.49811092019081116, "learning_rate": 5.231275049190414e-06, "loss": 0.3167, "step": 3462 }, { "epoch": 1.6104479925592932, "grad_norm": 0.47977715730667114, "learning_rate": 5.228571974890204e-06, "loss": 0.3564, "step": 3463 }, { "epoch": 1.61091303673849, "grad_norm": 0.39883333444595337, "learning_rate": 5.225868833644973e-06, "loss": 0.3644, "step": 3464 }, { "epoch": 1.6113780809176872, "grad_norm": 0.38860994577407837, "learning_rate": 5.223165626246432e-06, "loss": 0.347, "step": 3465 }, { "epoch": 1.6118431250968843, "grad_norm": 0.4335678815841675, "learning_rate": 5.220462353486304e-06, "loss": 0.3534, "step": 3466 }, { "epoch": 1.6123081692760812, "grad_norm": 0.4631887376308441, "learning_rate": 5.217759016156333e-06, "loss": 0.3494, "step": 3467 }, { "epoch": 1.6127732134552781, "grad_norm": 0.4094836711883545, "learning_rate": 5.215055615048283e-06, "loss": 0.3841, "step": 3468 }, { "epoch": 1.6132382576344753, "grad_norm": 0.36185774207115173, "learning_rate": 5.2123521509539375e-06, "loss": 0.3246, "step": 3469 }, { "epoch": 1.6137033018136724, "grad_norm": 0.46863463521003723, "learning_rate": 5.209648624665095e-06, "loss": 0.3801, "step": 3470 }, { "epoch": 1.6141683459928693, "grad_norm": 0.4251573085784912, "learning_rate": 5.206945036973577e-06, "loss": 0.3449, "step": 3471 }, { "epoch": 1.6146333901720662, "grad_norm": 0.4120125472545624, "learning_rate": 5.2042413886712176e-06, "loss": 0.3312, "step": 3472 }, { "epoch": 1.6150984343512633, "grad_norm": 0.41511979699134827, "learning_rate": 5.201537680549874e-06, "loss": 0.3495, "step": 3473 }, { "epoch": 1.6155634785304605, "grad_norm": 0.43771082162857056, "learning_rate": 5.198833913401418e-06, "loss": 0.3843, "step": 3474 }, { "epoch": 1.6160285227096574, "grad_norm": 0.39838284254074097, "learning_rate": 5.196130088017737e-06, "loss": 0.308, "step": 3475 }, { "epoch": 1.6164935668888545, "grad_norm": 0.4643276631832123, "learning_rate": 5.19342620519074e-06, "loss": 0.351, "step": 3476 }, { "epoch": 1.6169586110680516, "grad_norm": 0.34968698024749756, "learning_rate": 5.190722265712349e-06, "loss": 0.3099, "step": 3477 }, { "epoch": 1.6174236552472485, "grad_norm": 0.40132972598075867, "learning_rate": 5.188018270374504e-06, "loss": 0.3358, "step": 3478 }, { "epoch": 1.6178886994264454, "grad_norm": 0.3743666410446167, "learning_rate": 5.185314219969163e-06, "loss": 0.3669, "step": 3479 }, { "epoch": 1.6183537436056425, "grad_norm": 0.3797807991504669, "learning_rate": 5.182610115288296e-06, "loss": 0.3471, "step": 3480 }, { "epoch": 1.6188187877848397, "grad_norm": 0.43480437994003296, "learning_rate": 5.179905957123891e-06, "loss": 0.3913, "step": 3481 }, { "epoch": 1.6192838319640366, "grad_norm": 0.3873792886734009, "learning_rate": 5.177201746267955e-06, "loss": 0.354, "step": 3482 }, { "epoch": 1.6197488761432335, "grad_norm": 0.38425004482269287, "learning_rate": 5.174497483512506e-06, "loss": 0.3294, "step": 3483 }, { "epoch": 1.6202139203224306, "grad_norm": 0.4359165132045746, "learning_rate": 5.171793169649578e-06, "loss": 0.3585, "step": 3484 }, { "epoch": 1.6206789645016277, "grad_norm": 0.3433784246444702, "learning_rate": 5.1690888054712215e-06, "loss": 0.3329, "step": 3485 }, { "epoch": 1.6211440086808246, "grad_norm": 0.4633955657482147, "learning_rate": 5.1663843917695e-06, "loss": 0.3745, "step": 3486 }, { "epoch": 1.6216090528600215, "grad_norm": 0.44534632563591003, "learning_rate": 5.163679929336491e-06, "loss": 0.3692, "step": 3487 }, { "epoch": 1.6220740970392187, "grad_norm": 0.3649267852306366, "learning_rate": 5.160975418964293e-06, "loss": 0.362, "step": 3488 }, { "epoch": 1.6225391412184158, "grad_norm": 0.3883490264415741, "learning_rate": 5.158270861445007e-06, "loss": 0.3561, "step": 3489 }, { "epoch": 1.6230041853976127, "grad_norm": 0.3937935531139374, "learning_rate": 5.155566257570758e-06, "loss": 0.32, "step": 3490 }, { "epoch": 1.6234692295768098, "grad_norm": 0.36953264474868774, "learning_rate": 5.152861608133678e-06, "loss": 0.362, "step": 3491 }, { "epoch": 1.623934273756007, "grad_norm": 0.49353107810020447, "learning_rate": 5.150156913925916e-06, "loss": 0.3646, "step": 3492 }, { "epoch": 1.6243993179352039, "grad_norm": 0.393684446811676, "learning_rate": 5.147452175739633e-06, "loss": 0.3571, "step": 3493 }, { "epoch": 1.6248643621144008, "grad_norm": 0.369238942861557, "learning_rate": 5.144747394367002e-06, "loss": 0.3446, "step": 3494 }, { "epoch": 1.625329406293598, "grad_norm": 0.3968108296394348, "learning_rate": 5.142042570600212e-06, "loss": 0.3458, "step": 3495 }, { "epoch": 1.625794450472795, "grad_norm": 0.44049766659736633, "learning_rate": 5.139337705231459e-06, "loss": 0.3774, "step": 3496 }, { "epoch": 1.626259494651992, "grad_norm": 0.37539348006248474, "learning_rate": 5.136632799052957e-06, "loss": 0.3373, "step": 3497 }, { "epoch": 1.6267245388311888, "grad_norm": 0.4646461009979248, "learning_rate": 5.133927852856927e-06, "loss": 0.352, "step": 3498 }, { "epoch": 1.627189583010386, "grad_norm": 0.4138670861721039, "learning_rate": 5.131222867435602e-06, "loss": 0.3393, "step": 3499 }, { "epoch": 1.627654627189583, "grad_norm": 0.40055084228515625, "learning_rate": 5.128517843581233e-06, "loss": 0.3605, "step": 3500 }, { "epoch": 1.62811967136878, "grad_norm": 0.4990515112876892, "learning_rate": 5.125812782086075e-06, "loss": 0.338, "step": 3501 }, { "epoch": 1.628584715547977, "grad_norm": 0.49589017033576965, "learning_rate": 5.123107683742397e-06, "loss": 0.3181, "step": 3502 }, { "epoch": 1.629049759727174, "grad_norm": 0.4138791263103485, "learning_rate": 5.1204025493424766e-06, "loss": 0.3751, "step": 3503 }, { "epoch": 1.6295148039063712, "grad_norm": 0.38624855875968933, "learning_rate": 5.117697379678606e-06, "loss": 0.3404, "step": 3504 }, { "epoch": 1.629979848085568, "grad_norm": 0.5305654406547546, "learning_rate": 5.114992175543084e-06, "loss": 0.3786, "step": 3505 }, { "epoch": 1.6304448922647652, "grad_norm": 0.4678351581096649, "learning_rate": 5.112286937728223e-06, "loss": 0.3552, "step": 3506 }, { "epoch": 1.6309099364439623, "grad_norm": 0.4201079308986664, "learning_rate": 5.109581667026341e-06, "loss": 0.388, "step": 3507 }, { "epoch": 1.6313749806231592, "grad_norm": 0.39116141200065613, "learning_rate": 5.106876364229768e-06, "loss": 0.338, "step": 3508 }, { "epoch": 1.6318400248023561, "grad_norm": 0.3895026743412018, "learning_rate": 5.104171030130846e-06, "loss": 0.3374, "step": 3509 }, { "epoch": 1.6323050689815533, "grad_norm": 0.3727634847164154, "learning_rate": 5.101465665521919e-06, "loss": 0.372, "step": 3510 }, { "epoch": 1.6327701131607504, "grad_norm": 0.3340446352958679, "learning_rate": 5.098760271195348e-06, "loss": 0.3047, "step": 3511 }, { "epoch": 1.6332351573399473, "grad_norm": 0.42071983218193054, "learning_rate": 5.096054847943498e-06, "loss": 0.3951, "step": 3512 }, { "epoch": 1.6337002015191442, "grad_norm": 0.3533824682235718, "learning_rate": 5.093349396558744e-06, "loss": 0.3344, "step": 3513 }, { "epoch": 1.6341652456983413, "grad_norm": 0.44178301095962524, "learning_rate": 5.090643917833465e-06, "loss": 0.3444, "step": 3514 }, { "epoch": 1.6346302898775384, "grad_norm": 0.42436400055885315, "learning_rate": 5.0879384125600565e-06, "loss": 0.3877, "step": 3515 }, { "epoch": 1.6350953340567354, "grad_norm": 0.4048612415790558, "learning_rate": 5.085232881530916e-06, "loss": 0.3495, "step": 3516 }, { "epoch": 1.6355603782359323, "grad_norm": 0.3634321689605713, "learning_rate": 5.082527325538449e-06, "loss": 0.3845, "step": 3517 }, { "epoch": 1.6360254224151294, "grad_norm": 0.3633408546447754, "learning_rate": 5.0798217453750675e-06, "loss": 0.3352, "step": 3518 }, { "epoch": 1.6364904665943265, "grad_norm": 0.4096944034099579, "learning_rate": 5.077116141833195e-06, "loss": 0.3372, "step": 3519 }, { "epoch": 1.6369555107735234, "grad_norm": 0.37343043088912964, "learning_rate": 5.074410515705256e-06, "loss": 0.345, "step": 3520 }, { "epoch": 1.6374205549527205, "grad_norm": 0.4189586639404297, "learning_rate": 5.071704867783684e-06, "loss": 0.3634, "step": 3521 }, { "epoch": 1.6378855991319177, "grad_norm": 0.358604371547699, "learning_rate": 5.068999198860924e-06, "loss": 0.3367, "step": 3522 }, { "epoch": 1.6383506433111146, "grad_norm": 0.3955291509628296, "learning_rate": 5.066293509729418e-06, "loss": 0.3635, "step": 3523 }, { "epoch": 1.6388156874903115, "grad_norm": 0.3491485118865967, "learning_rate": 5.063587801181621e-06, "loss": 0.3198, "step": 3524 }, { "epoch": 1.6392807316695086, "grad_norm": 0.39579418301582336, "learning_rate": 5.060882074009988e-06, "loss": 0.3553, "step": 3525 }, { "epoch": 1.6397457758487057, "grad_norm": 0.404255747795105, "learning_rate": 5.0581763290069865e-06, "loss": 0.3354, "step": 3526 }, { "epoch": 1.6402108200279026, "grad_norm": 0.38574931025505066, "learning_rate": 5.055470566965082e-06, "loss": 0.3344, "step": 3527 }, { "epoch": 1.6406758642070995, "grad_norm": 0.3492719829082489, "learning_rate": 5.052764788676749e-06, "loss": 0.3031, "step": 3528 }, { "epoch": 1.6411409083862967, "grad_norm": 0.4036017954349518, "learning_rate": 5.050058994934467e-06, "loss": 0.3784, "step": 3529 }, { "epoch": 1.6416059525654938, "grad_norm": 0.4062303900718689, "learning_rate": 5.047353186530718e-06, "loss": 0.38, "step": 3530 }, { "epoch": 1.6420709967446907, "grad_norm": 0.3702806532382965, "learning_rate": 5.04464736425799e-06, "loss": 0.3046, "step": 3531 }, { "epoch": 1.6425360409238876, "grad_norm": 0.40208354592323303, "learning_rate": 5.0419415289087755e-06, "loss": 0.3609, "step": 3532 }, { "epoch": 1.643001085103085, "grad_norm": 0.35900288820266724, "learning_rate": 5.039235681275568e-06, "loss": 0.305, "step": 3533 }, { "epoch": 1.6434661292822819, "grad_norm": 0.4107772707939148, "learning_rate": 5.036529822150865e-06, "loss": 0.367, "step": 3534 }, { "epoch": 1.6439311734614788, "grad_norm": 0.36534854769706726, "learning_rate": 5.033823952327173e-06, "loss": 0.3382, "step": 3535 }, { "epoch": 1.644396217640676, "grad_norm": 0.3700648844242096, "learning_rate": 5.031118072596993e-06, "loss": 0.3338, "step": 3536 }, { "epoch": 1.644861261819873, "grad_norm": 0.3885965943336487, "learning_rate": 5.028412183752835e-06, "loss": 0.3292, "step": 3537 }, { "epoch": 1.64532630599907, "grad_norm": 0.49532386660575867, "learning_rate": 5.025706286587211e-06, "loss": 0.3665, "step": 3538 }, { "epoch": 1.6457913501782668, "grad_norm": 0.42976298928260803, "learning_rate": 5.023000381892633e-06, "loss": 0.3094, "step": 3539 }, { "epoch": 1.646256394357464, "grad_norm": 0.4339199662208557, "learning_rate": 5.020294470461615e-06, "loss": 0.3615, "step": 3540 }, { "epoch": 1.646721438536661, "grad_norm": 0.4408279061317444, "learning_rate": 5.017588553086677e-06, "loss": 0.3422, "step": 3541 }, { "epoch": 1.647186482715858, "grad_norm": 0.4677359163761139, "learning_rate": 5.014882630560339e-06, "loss": 0.3658, "step": 3542 }, { "epoch": 1.647651526895055, "grad_norm": 0.416978657245636, "learning_rate": 5.01217670367512e-06, "loss": 0.3575, "step": 3543 }, { "epoch": 1.648116571074252, "grad_norm": 0.42392852902412415, "learning_rate": 5.009470773223541e-06, "loss": 0.3341, "step": 3544 }, { "epoch": 1.6485816152534492, "grad_norm": 0.48418161273002625, "learning_rate": 5.006764839998128e-06, "loss": 0.3392, "step": 3545 }, { "epoch": 1.649046659432646, "grad_norm": 0.4479508697986603, "learning_rate": 5.004058904791402e-06, "loss": 0.3764, "step": 3546 }, { "epoch": 1.649511703611843, "grad_norm": 0.43179401755332947, "learning_rate": 5.0013529683958885e-06, "loss": 0.3507, "step": 3547 }, { "epoch": 1.6499767477910403, "grad_norm": 0.3624599874019623, "learning_rate": 4.998647031604114e-06, "loss": 0.3289, "step": 3548 }, { "epoch": 1.6504417919702372, "grad_norm": 0.46299293637275696, "learning_rate": 4.995941095208599e-06, "loss": 0.3391, "step": 3549 }, { "epoch": 1.6509068361494341, "grad_norm": 0.41883838176727295, "learning_rate": 4.993235160001874e-06, "loss": 0.3454, "step": 3550 }, { "epoch": 1.6513718803286312, "grad_norm": 0.40758463740348816, "learning_rate": 4.99052922677646e-06, "loss": 0.3604, "step": 3551 }, { "epoch": 1.6518369245078284, "grad_norm": 0.4133349359035492, "learning_rate": 4.987823296324882e-06, "loss": 0.3388, "step": 3552 }, { "epoch": 1.6523019686870253, "grad_norm": 0.4033692479133606, "learning_rate": 4.985117369439661e-06, "loss": 0.3454, "step": 3553 }, { "epoch": 1.6527670128662222, "grad_norm": 0.38803398609161377, "learning_rate": 4.982411446913324e-06, "loss": 0.3475, "step": 3554 }, { "epoch": 1.6532320570454193, "grad_norm": 0.4113628566265106, "learning_rate": 4.979705529538385e-06, "loss": 0.337, "step": 3555 }, { "epoch": 1.6536971012246164, "grad_norm": 0.4451198875904083, "learning_rate": 4.976999618107369e-06, "loss": 0.3973, "step": 3556 }, { "epoch": 1.6541621454038133, "grad_norm": 0.36026668548583984, "learning_rate": 4.974293713412791e-06, "loss": 0.3212, "step": 3557 }, { "epoch": 1.6546271895830102, "grad_norm": 0.38637468218803406, "learning_rate": 4.971587816247166e-06, "loss": 0.3623, "step": 3558 }, { "epoch": 1.6550922337622074, "grad_norm": 0.3913023769855499, "learning_rate": 4.9688819274030074e-06, "loss": 0.3304, "step": 3559 }, { "epoch": 1.6555572779414045, "grad_norm": 0.3982096016407013, "learning_rate": 4.96617604767283e-06, "loss": 0.3727, "step": 3560 }, { "epoch": 1.6560223221206014, "grad_norm": 0.3954141139984131, "learning_rate": 4.963470177849135e-06, "loss": 0.328, "step": 3561 }, { "epoch": 1.6564873662997985, "grad_norm": 0.40723100304603577, "learning_rate": 4.960764318724434e-06, "loss": 0.344, "step": 3562 }, { "epoch": 1.6569524104789957, "grad_norm": 0.35724228620529175, "learning_rate": 4.958058471091225e-06, "loss": 0.3379, "step": 3563 }, { "epoch": 1.6574174546581926, "grad_norm": 0.3636278808116913, "learning_rate": 4.9553526357420104e-06, "loss": 0.3522, "step": 3564 }, { "epoch": 1.6578824988373895, "grad_norm": 0.5159657597541809, "learning_rate": 4.952646813469282e-06, "loss": 0.3819, "step": 3565 }, { "epoch": 1.6583475430165866, "grad_norm": 0.40651264786720276, "learning_rate": 4.949941005065534e-06, "loss": 0.3147, "step": 3566 }, { "epoch": 1.6588125871957837, "grad_norm": 0.4187847673892975, "learning_rate": 4.947235211323253e-06, "loss": 0.3342, "step": 3567 }, { "epoch": 1.6592776313749806, "grad_norm": 0.43306782841682434, "learning_rate": 4.944529433034919e-06, "loss": 0.3691, "step": 3568 }, { "epoch": 1.6597426755541775, "grad_norm": 0.4201146960258484, "learning_rate": 4.941823670993016e-06, "loss": 0.3509, "step": 3569 }, { "epoch": 1.6602077197333747, "grad_norm": 0.4412671625614166, "learning_rate": 4.939117925990013e-06, "loss": 0.3418, "step": 3570 }, { "epoch": 1.6606727639125718, "grad_norm": 0.3967730700969696, "learning_rate": 4.936412198818382e-06, "loss": 0.3747, "step": 3571 }, { "epoch": 1.6611378080917687, "grad_norm": 0.3809349834918976, "learning_rate": 4.933706490270583e-06, "loss": 0.3397, "step": 3572 }, { "epoch": 1.6616028522709656, "grad_norm": 0.3735617697238922, "learning_rate": 4.9310008011390774e-06, "loss": 0.3398, "step": 3573 }, { "epoch": 1.6620678964501627, "grad_norm": 0.44322773814201355, "learning_rate": 4.9282951322163166e-06, "loss": 0.3258, "step": 3574 }, { "epoch": 1.6625329406293599, "grad_norm": 0.438362181186676, "learning_rate": 4.925589484294747e-06, "loss": 0.3926, "step": 3575 }, { "epoch": 1.6629979848085568, "grad_norm": 0.36551767587661743, "learning_rate": 4.922883858166807e-06, "loss": 0.3317, "step": 3576 }, { "epoch": 1.6634630289877539, "grad_norm": 0.39981698989868164, "learning_rate": 4.920178254624935e-06, "loss": 0.3266, "step": 3577 }, { "epoch": 1.663928073166951, "grad_norm": 0.4227448105812073, "learning_rate": 4.917472674461553e-06, "loss": 0.3333, "step": 3578 }, { "epoch": 1.664393117346148, "grad_norm": 0.4146755337715149, "learning_rate": 4.9147671184690855e-06, "loss": 0.3662, "step": 3579 }, { "epoch": 1.6648581615253448, "grad_norm": 0.4139578342437744, "learning_rate": 4.912061587439944e-06, "loss": 0.3087, "step": 3580 }, { "epoch": 1.665323205704542, "grad_norm": 0.4043808579444885, "learning_rate": 4.9093560821665365e-06, "loss": 0.3842, "step": 3581 }, { "epoch": 1.665788249883739, "grad_norm": 0.3665468990802765, "learning_rate": 4.906650603441259e-06, "loss": 0.3199, "step": 3582 }, { "epoch": 1.666253294062936, "grad_norm": 0.46738430857658386, "learning_rate": 4.903945152056505e-06, "loss": 0.374, "step": 3583 }, { "epoch": 1.666718338242133, "grad_norm": 0.4571022689342499, "learning_rate": 4.901239728804653e-06, "loss": 0.3221, "step": 3584 }, { "epoch": 1.66718338242133, "grad_norm": 0.41317233443260193, "learning_rate": 4.8985343344780815e-06, "loss": 0.3583, "step": 3585 }, { "epoch": 1.6676484266005271, "grad_norm": 0.36751988530158997, "learning_rate": 4.895828969869157e-06, "loss": 0.3409, "step": 3586 }, { "epoch": 1.668113470779724, "grad_norm": 0.44614648818969727, "learning_rate": 4.8931236357702326e-06, "loss": 0.3393, "step": 3587 }, { "epoch": 1.668578514958921, "grad_norm": 0.3438377380371094, "learning_rate": 4.89041833297366e-06, "loss": 0.3349, "step": 3588 }, { "epoch": 1.669043559138118, "grad_norm": 0.393669992685318, "learning_rate": 4.88771306227178e-06, "loss": 0.3534, "step": 3589 }, { "epoch": 1.6695086033173152, "grad_norm": 0.40612417459487915, "learning_rate": 4.885007824456917e-06, "loss": 0.3435, "step": 3590 }, { "epoch": 1.6699736474965121, "grad_norm": 0.3694112300872803, "learning_rate": 4.882302620321395e-06, "loss": 0.3275, "step": 3591 }, { "epoch": 1.6704386916757092, "grad_norm": 0.3994767665863037, "learning_rate": 4.879597450657525e-06, "loss": 0.3593, "step": 3592 }, { "epoch": 1.6709037358549064, "grad_norm": 0.3501642346382141, "learning_rate": 4.876892316257605e-06, "loss": 0.3165, "step": 3593 }, { "epoch": 1.6713687800341033, "grad_norm": 0.3949240744113922, "learning_rate": 4.874187217913926e-06, "loss": 0.3407, "step": 3594 }, { "epoch": 1.6718338242133002, "grad_norm": 0.4457712471485138, "learning_rate": 4.871482156418769e-06, "loss": 0.3615, "step": 3595 }, { "epoch": 1.6722988683924973, "grad_norm": 0.36873969435691833, "learning_rate": 4.868777132564398e-06, "loss": 0.3671, "step": 3596 }, { "epoch": 1.6727639125716944, "grad_norm": 0.35475531220436096, "learning_rate": 4.866072147143075e-06, "loss": 0.3593, "step": 3597 }, { "epoch": 1.6732289567508913, "grad_norm": 0.3828467130661011, "learning_rate": 4.863367200947044e-06, "loss": 0.344, "step": 3598 }, { "epoch": 1.6736940009300882, "grad_norm": 0.44382408261299133, "learning_rate": 4.8606622947685415e-06, "loss": 0.3121, "step": 3599 }, { "epoch": 1.6741590451092854, "grad_norm": 0.4533607065677643, "learning_rate": 4.857957429399788e-06, "loss": 0.3396, "step": 3600 }, { "epoch": 1.6746240892884825, "grad_norm": 0.4441719651222229, "learning_rate": 4.855252605632999e-06, "loss": 0.3897, "step": 3601 }, { "epoch": 1.6750891334676794, "grad_norm": 0.36047327518463135, "learning_rate": 4.852547824260369e-06, "loss": 0.3307, "step": 3602 }, { "epoch": 1.6755541776468763, "grad_norm": 0.39020687341690063, "learning_rate": 4.849843086074085e-06, "loss": 0.3056, "step": 3603 }, { "epoch": 1.6760192218260734, "grad_norm": 0.5388873219490051, "learning_rate": 4.847138391866325e-06, "loss": 0.3605, "step": 3604 }, { "epoch": 1.6764842660052706, "grad_norm": 0.4391537010669708, "learning_rate": 4.8444337424292445e-06, "loss": 0.3831, "step": 3605 }, { "epoch": 1.6769493101844675, "grad_norm": 0.3435923457145691, "learning_rate": 4.841729138554996e-06, "loss": 0.3155, "step": 3606 }, { "epoch": 1.6774143543636646, "grad_norm": 0.3751065135002136, "learning_rate": 4.839024581035709e-06, "loss": 0.3387, "step": 3607 }, { "epoch": 1.6778793985428617, "grad_norm": 0.47324469685554504, "learning_rate": 4.83632007066351e-06, "loss": 0.3561, "step": 3608 }, { "epoch": 1.6783444427220586, "grad_norm": 0.4418434202671051, "learning_rate": 4.833615608230501e-06, "loss": 0.3445, "step": 3609 }, { "epoch": 1.6788094869012555, "grad_norm": 0.4009036719799042, "learning_rate": 4.830911194528781e-06, "loss": 0.3336, "step": 3610 }, { "epoch": 1.6792745310804527, "grad_norm": 0.42753738164901733, "learning_rate": 4.828206830350423e-06, "loss": 0.3613, "step": 3611 }, { "epoch": 1.6797395752596498, "grad_norm": 0.3864332139492035, "learning_rate": 4.825502516487497e-06, "loss": 0.3251, "step": 3612 }, { "epoch": 1.6802046194388467, "grad_norm": 0.38312116265296936, "learning_rate": 4.822798253732046e-06, "loss": 0.3146, "step": 3613 }, { "epoch": 1.6806696636180436, "grad_norm": 0.4273233413696289, "learning_rate": 4.82009404287611e-06, "loss": 0.34, "step": 3614 }, { "epoch": 1.6811347077972407, "grad_norm": 0.4202836751937866, "learning_rate": 4.817389884711706e-06, "loss": 0.3703, "step": 3615 }, { "epoch": 1.6815997519764379, "grad_norm": 0.4047132730484009, "learning_rate": 4.81468578003084e-06, "loss": 0.3389, "step": 3616 }, { "epoch": 1.6820647961556348, "grad_norm": 0.4253717362880707, "learning_rate": 4.8119817296254965e-06, "loss": 0.3647, "step": 3617 }, { "epoch": 1.6825298403348317, "grad_norm": 0.4105294644832611, "learning_rate": 4.809277734287654e-06, "loss": 0.3298, "step": 3618 }, { "epoch": 1.6829948845140288, "grad_norm": 0.367784321308136, "learning_rate": 4.8065737948092615e-06, "loss": 0.3189, "step": 3619 }, { "epoch": 1.683459928693226, "grad_norm": 0.42748555541038513, "learning_rate": 4.803869911982264e-06, "loss": 0.3723, "step": 3620 }, { "epoch": 1.6839249728724228, "grad_norm": 0.44452059268951416, "learning_rate": 4.801166086598584e-06, "loss": 0.324, "step": 3621 }, { "epoch": 1.68439001705162, "grad_norm": 0.4104439914226532, "learning_rate": 4.798462319450127e-06, "loss": 0.316, "step": 3622 }, { "epoch": 1.684855061230817, "grad_norm": 0.37423205375671387, "learning_rate": 4.795758611328782e-06, "loss": 0.3399, "step": 3623 }, { "epoch": 1.685320105410014, "grad_norm": 0.39451467990875244, "learning_rate": 4.793054963026425e-06, "loss": 0.3372, "step": 3624 }, { "epoch": 1.6857851495892109, "grad_norm": 0.42838355898857117, "learning_rate": 4.790351375334906e-06, "loss": 0.358, "step": 3625 }, { "epoch": 1.686250193768408, "grad_norm": 0.4108628034591675, "learning_rate": 4.787647849046064e-06, "loss": 0.31, "step": 3626 }, { "epoch": 1.6867152379476051, "grad_norm": 0.481794536113739, "learning_rate": 4.784944384951718e-06, "loss": 0.3697, "step": 3627 }, { "epoch": 1.687180282126802, "grad_norm": 0.4808673858642578, "learning_rate": 4.782240983843668e-06, "loss": 0.3575, "step": 3628 }, { "epoch": 1.687645326305999, "grad_norm": 0.40240907669067383, "learning_rate": 4.779537646513697e-06, "loss": 0.3278, "step": 3629 }, { "epoch": 1.688110370485196, "grad_norm": 0.3663456439971924, "learning_rate": 4.7768343737535694e-06, "loss": 0.299, "step": 3630 }, { "epoch": 1.6885754146643932, "grad_norm": 0.470187246799469, "learning_rate": 4.774131166355027e-06, "loss": 0.4006, "step": 3631 }, { "epoch": 1.68904045884359, "grad_norm": 0.4300827383995056, "learning_rate": 4.771428025109798e-06, "loss": 0.3551, "step": 3632 }, { "epoch": 1.689505503022787, "grad_norm": 0.45297130942344666, "learning_rate": 4.768724950809587e-06, "loss": 0.3158, "step": 3633 }, { "epoch": 1.6899705472019841, "grad_norm": 0.39991265535354614, "learning_rate": 4.766021944246082e-06, "loss": 0.3333, "step": 3634 }, { "epoch": 1.6904355913811813, "grad_norm": 0.42256665229797363, "learning_rate": 4.763319006210949e-06, "loss": 0.3593, "step": 3635 }, { "epoch": 1.6909006355603782, "grad_norm": 0.4344233274459839, "learning_rate": 4.7606161374958355e-06, "loss": 0.402, "step": 3636 }, { "epoch": 1.6913656797395753, "grad_norm": 0.39570462703704834, "learning_rate": 4.757913338892365e-06, "loss": 0.3526, "step": 3637 }, { "epoch": 1.6918307239187724, "grad_norm": 0.38186073303222656, "learning_rate": 4.755210611192146e-06, "loss": 0.328, "step": 3638 }, { "epoch": 1.6922957680979693, "grad_norm": 0.3946187198162079, "learning_rate": 4.752507955186765e-06, "loss": 0.3226, "step": 3639 }, { "epoch": 1.6927608122771662, "grad_norm": 0.4827534258365631, "learning_rate": 4.749805371667781e-06, "loss": 0.3992, "step": 3640 }, { "epoch": 1.6932258564563634, "grad_norm": 0.3777780830860138, "learning_rate": 4.747102861426742e-06, "loss": 0.3196, "step": 3641 }, { "epoch": 1.6936909006355605, "grad_norm": 0.4654094874858856, "learning_rate": 4.744400425255165e-06, "loss": 0.3779, "step": 3642 }, { "epoch": 1.6941559448147574, "grad_norm": 0.3912859559059143, "learning_rate": 4.741698063944553e-06, "loss": 0.3197, "step": 3643 }, { "epoch": 1.6946209889939543, "grad_norm": 0.40966254472732544, "learning_rate": 4.7389957782863806e-06, "loss": 0.3348, "step": 3644 }, { "epoch": 1.6950860331731514, "grad_norm": 0.41277340054512024, "learning_rate": 4.736293569072108e-06, "loss": 0.3528, "step": 3645 }, { "epoch": 1.6955510773523486, "grad_norm": 0.42663225531578064, "learning_rate": 4.733591437093163e-06, "loss": 0.3359, "step": 3646 }, { "epoch": 1.6960161215315455, "grad_norm": 0.36344727873802185, "learning_rate": 4.730889383140961e-06, "loss": 0.35, "step": 3647 }, { "epoch": 1.6964811657107424, "grad_norm": 0.41764384508132935, "learning_rate": 4.7281874080068855e-06, "loss": 0.375, "step": 3648 }, { "epoch": 1.6969462098899395, "grad_norm": 0.4017370641231537, "learning_rate": 4.725485512482304e-06, "loss": 0.315, "step": 3649 }, { "epoch": 1.6974112540691366, "grad_norm": 0.4225184917449951, "learning_rate": 4.722783697358555e-06, "loss": 0.3786, "step": 3650 }, { "epoch": 1.6978762982483335, "grad_norm": 0.39845162630081177, "learning_rate": 4.720081963426962e-06, "loss": 0.3394, "step": 3651 }, { "epoch": 1.6983413424275307, "grad_norm": 0.3981609642505646, "learning_rate": 4.717380311478813e-06, "loss": 0.3866, "step": 3652 }, { "epoch": 1.6988063866067278, "grad_norm": 0.3535573184490204, "learning_rate": 4.714678742305381e-06, "loss": 0.3103, "step": 3653 }, { "epoch": 1.6992714307859247, "grad_norm": 0.43783578276634216, "learning_rate": 4.711977256697909e-06, "loss": 0.3763, "step": 3654 }, { "epoch": 1.6997364749651216, "grad_norm": 0.39808762073516846, "learning_rate": 4.7092758554476215e-06, "loss": 0.3485, "step": 3655 }, { "epoch": 1.7002015191443187, "grad_norm": 0.3975062668323517, "learning_rate": 4.706574539345712e-06, "loss": 0.3592, "step": 3656 }, { "epoch": 1.7006665633235158, "grad_norm": 0.3854736089706421, "learning_rate": 4.703873309183357e-06, "loss": 0.3236, "step": 3657 }, { "epoch": 1.7011316075027128, "grad_norm": 0.3952430486679077, "learning_rate": 4.7011721657516966e-06, "loss": 0.363, "step": 3658 }, { "epoch": 1.7015966516819097, "grad_norm": 0.47998958826065063, "learning_rate": 4.698471109841858e-06, "loss": 0.4107, "step": 3659 }, { "epoch": 1.7020616958611068, "grad_norm": 0.37458357214927673, "learning_rate": 4.695770142244931e-06, "loss": 0.3453, "step": 3660 }, { "epoch": 1.702526740040304, "grad_norm": 0.3739306330680847, "learning_rate": 4.693069263751989e-06, "loss": 0.3587, "step": 3661 }, { "epoch": 1.7029917842195008, "grad_norm": 0.38612034916877747, "learning_rate": 4.690368475154072e-06, "loss": 0.3531, "step": 3662 }, { "epoch": 1.7034568283986977, "grad_norm": 0.41020193696022034, "learning_rate": 4.687667777242203e-06, "loss": 0.3569, "step": 3663 }, { "epoch": 1.7039218725778948, "grad_norm": 0.3760784864425659, "learning_rate": 4.684967170807365e-06, "loss": 0.3417, "step": 3664 }, { "epoch": 1.704386916757092, "grad_norm": 0.3778047263622284, "learning_rate": 4.682266656640528e-06, "loss": 0.3226, "step": 3665 }, { "epoch": 1.7048519609362889, "grad_norm": 0.4011727273464203, "learning_rate": 4.679566235532625e-06, "loss": 0.3394, "step": 3666 }, { "epoch": 1.705317005115486, "grad_norm": 0.38582319021224976, "learning_rate": 4.676865908274567e-06, "loss": 0.3972, "step": 3667 }, { "epoch": 1.7057820492946831, "grad_norm": 0.3527704179286957, "learning_rate": 4.674165675657236e-06, "loss": 0.3193, "step": 3668 }, { "epoch": 1.70624709347388, "grad_norm": 0.4227658808231354, "learning_rate": 4.671465538471487e-06, "loss": 0.3571, "step": 3669 }, { "epoch": 1.706712137653077, "grad_norm": 0.36377352476119995, "learning_rate": 4.668765497508143e-06, "loss": 0.3433, "step": 3670 }, { "epoch": 1.707177181832274, "grad_norm": 0.3703608512878418, "learning_rate": 4.666065553558007e-06, "loss": 0.3152, "step": 3671 }, { "epoch": 1.7076422260114712, "grad_norm": 0.39327993988990784, "learning_rate": 4.663365707411845e-06, "loss": 0.3686, "step": 3672 }, { "epoch": 1.708107270190668, "grad_norm": 0.41823717951774597, "learning_rate": 4.660665959860399e-06, "loss": 0.3777, "step": 3673 }, { "epoch": 1.708572314369865, "grad_norm": 0.3737585246562958, "learning_rate": 4.657966311694383e-06, "loss": 0.3365, "step": 3674 }, { "epoch": 1.7090373585490621, "grad_norm": 0.43027764558792114, "learning_rate": 4.655266763704476e-06, "loss": 0.3414, "step": 3675 }, { "epoch": 1.7095024027282593, "grad_norm": 0.376740962266922, "learning_rate": 4.652567316681337e-06, "loss": 0.3267, "step": 3676 }, { "epoch": 1.7099674469074562, "grad_norm": 0.40848153829574585, "learning_rate": 4.649867971415585e-06, "loss": 0.361, "step": 3677 }, { "epoch": 1.710432491086653, "grad_norm": 0.40334048867225647, "learning_rate": 4.647168728697819e-06, "loss": 0.3619, "step": 3678 }, { "epoch": 1.7108975352658504, "grad_norm": 0.38001689314842224, "learning_rate": 4.6444695893185994e-06, "loss": 0.3197, "step": 3679 }, { "epoch": 1.7113625794450473, "grad_norm": 0.4063551425933838, "learning_rate": 4.641770554068465e-06, "loss": 0.3519, "step": 3680 }, { "epoch": 1.7118276236242442, "grad_norm": 0.46857863664627075, "learning_rate": 4.639071623737913e-06, "loss": 0.3636, "step": 3681 }, { "epoch": 1.7122926678034414, "grad_norm": 0.4151492416858673, "learning_rate": 4.636372799117424e-06, "loss": 0.3372, "step": 3682 }, { "epoch": 1.7127577119826385, "grad_norm": 0.43509939312934875, "learning_rate": 4.6336740809974315e-06, "loss": 0.3731, "step": 3683 }, { "epoch": 1.7132227561618354, "grad_norm": 0.4010016918182373, "learning_rate": 4.630975470168352e-06, "loss": 0.3233, "step": 3684 }, { "epoch": 1.7136878003410323, "grad_norm": 0.39253363013267517, "learning_rate": 4.628276967420563e-06, "loss": 0.3614, "step": 3685 }, { "epoch": 1.7141528445202294, "grad_norm": 0.36530962586402893, "learning_rate": 4.625578573544414e-06, "loss": 0.3289, "step": 3686 }, { "epoch": 1.7146178886994266, "grad_norm": 0.4295608103275299, "learning_rate": 4.622880289330217e-06, "loss": 0.3404, "step": 3687 }, { "epoch": 1.7150829328786235, "grad_norm": 0.39288264513015747, "learning_rate": 4.620182115568259e-06, "loss": 0.3217, "step": 3688 }, { "epoch": 1.7155479770578204, "grad_norm": 0.43459996581077576, "learning_rate": 4.617484053048788e-06, "loss": 0.3493, "step": 3689 }, { "epoch": 1.7160130212370175, "grad_norm": 0.39573171734809875, "learning_rate": 4.614786102562026e-06, "loss": 0.3547, "step": 3690 }, { "epoch": 1.7164780654162146, "grad_norm": 0.39884433150291443, "learning_rate": 4.6120882648981565e-06, "loss": 0.35, "step": 3691 }, { "epoch": 1.7169431095954115, "grad_norm": 0.4326457679271698, "learning_rate": 4.609390540847336e-06, "loss": 0.3687, "step": 3692 }, { "epoch": 1.7174081537746084, "grad_norm": 0.3984185755252838, "learning_rate": 4.606692931199678e-06, "loss": 0.3789, "step": 3693 }, { "epoch": 1.7178731979538058, "grad_norm": 0.3940320312976837, "learning_rate": 4.603995436745274e-06, "loss": 0.3263, "step": 3694 }, { "epoch": 1.7183382421330027, "grad_norm": 0.4455493688583374, "learning_rate": 4.6012980582741725e-06, "loss": 0.3492, "step": 3695 }, { "epoch": 1.7188032863121996, "grad_norm": 0.4288008511066437, "learning_rate": 4.598600796576395e-06, "loss": 0.3087, "step": 3696 }, { "epoch": 1.7192683304913967, "grad_norm": 0.3839088976383209, "learning_rate": 4.595903652441923e-06, "loss": 0.3869, "step": 3697 }, { "epoch": 1.7197333746705938, "grad_norm": 0.3795816898345947, "learning_rate": 4.59320662666071e-06, "loss": 0.3492, "step": 3698 }, { "epoch": 1.7201984188497907, "grad_norm": 0.39537104964256287, "learning_rate": 4.590509720022665e-06, "loss": 0.3488, "step": 3699 }, { "epoch": 1.7206634630289876, "grad_norm": 0.3996933102607727, "learning_rate": 4.587812933317674e-06, "loss": 0.3653, "step": 3700 }, { "epoch": 1.7211285072081848, "grad_norm": 0.3842652142047882, "learning_rate": 4.5851162673355785e-06, "loss": 0.3643, "step": 3701 }, { "epoch": 1.721593551387382, "grad_norm": 0.3390163779258728, "learning_rate": 4.58241972286619e-06, "loss": 0.3105, "step": 3702 }, { "epoch": 1.7220585955665788, "grad_norm": 0.3907347619533539, "learning_rate": 4.5797233006992805e-06, "loss": 0.3597, "step": 3703 }, { "epoch": 1.7225236397457757, "grad_norm": 0.38712698221206665, "learning_rate": 4.5770270016245915e-06, "loss": 0.3285, "step": 3704 }, { "epoch": 1.7229886839249728, "grad_norm": 0.40449246764183044, "learning_rate": 4.574330826431822e-06, "loss": 0.3639, "step": 3705 }, { "epoch": 1.72345372810417, "grad_norm": 0.39589884877204895, "learning_rate": 4.571634775910641e-06, "loss": 0.3156, "step": 3706 }, { "epoch": 1.7239187722833669, "grad_norm": 0.4132598042488098, "learning_rate": 4.568938850850673e-06, "loss": 0.3397, "step": 3707 }, { "epoch": 1.7243838164625638, "grad_norm": 0.3807767927646637, "learning_rate": 4.566243052041516e-06, "loss": 0.3297, "step": 3708 }, { "epoch": 1.7248488606417611, "grad_norm": 0.3866475820541382, "learning_rate": 4.5635473802727225e-06, "loss": 0.3258, "step": 3709 }, { "epoch": 1.725313904820958, "grad_norm": 0.38793426752090454, "learning_rate": 4.560851836333813e-06, "loss": 0.3257, "step": 3710 }, { "epoch": 1.725778949000155, "grad_norm": 0.440531849861145, "learning_rate": 4.558156421014268e-06, "loss": 0.3547, "step": 3711 }, { "epoch": 1.726243993179352, "grad_norm": 0.4038770794868469, "learning_rate": 4.555461135103529e-06, "loss": 0.35, "step": 3712 }, { "epoch": 1.7267090373585492, "grad_norm": 0.32992175221443176, "learning_rate": 4.5527659793910025e-06, "loss": 0.3144, "step": 3713 }, { "epoch": 1.727174081537746, "grad_norm": 0.4566981792449951, "learning_rate": 4.550070954666056e-06, "loss": 0.4192, "step": 3714 }, { "epoch": 1.727639125716943, "grad_norm": 0.419821172952652, "learning_rate": 4.547376061718021e-06, "loss": 0.3432, "step": 3715 }, { "epoch": 1.7281041698961401, "grad_norm": 0.4241842031478882, "learning_rate": 4.544681301336182e-06, "loss": 0.3418, "step": 3716 }, { "epoch": 1.7285692140753373, "grad_norm": 0.3986935019493103, "learning_rate": 4.541986674309798e-06, "loss": 0.3934, "step": 3717 }, { "epoch": 1.7290342582545342, "grad_norm": 0.360019713640213, "learning_rate": 4.539292181428074e-06, "loss": 0.3393, "step": 3718 }, { "epoch": 1.729499302433731, "grad_norm": 0.39975255727767944, "learning_rate": 4.536597823480188e-06, "loss": 0.3076, "step": 3719 }, { "epoch": 1.7299643466129282, "grad_norm": 0.42592859268188477, "learning_rate": 4.533903601255272e-06, "loss": 0.3379, "step": 3720 }, { "epoch": 1.7304293907921253, "grad_norm": 0.3946399986743927, "learning_rate": 4.531209515542422e-06, "loss": 0.377, "step": 3721 }, { "epoch": 1.7308944349713222, "grad_norm": 0.39706122875213623, "learning_rate": 4.528515567130688e-06, "loss": 0.3682, "step": 3722 }, { "epoch": 1.7313594791505194, "grad_norm": 0.3509725332260132, "learning_rate": 4.525821756809088e-06, "loss": 0.28, "step": 3723 }, { "epoch": 1.7318245233297165, "grad_norm": 0.4142434895038605, "learning_rate": 4.523128085366592e-06, "loss": 0.3259, "step": 3724 }, { "epoch": 1.7322895675089134, "grad_norm": 0.39951425790786743, "learning_rate": 4.520434553592134e-06, "loss": 0.3618, "step": 3725 }, { "epoch": 1.7327546116881103, "grad_norm": 0.3927379250526428, "learning_rate": 4.517741162274605e-06, "loss": 0.3323, "step": 3726 }, { "epoch": 1.7332196558673074, "grad_norm": 0.3651019036769867, "learning_rate": 4.515047912202858e-06, "loss": 0.3219, "step": 3727 }, { "epoch": 1.7336847000465045, "grad_norm": 0.40421250462532043, "learning_rate": 4.5123548041656984e-06, "loss": 0.4112, "step": 3728 }, { "epoch": 1.7341497442257015, "grad_norm": 0.39693304896354675, "learning_rate": 4.509661838951897e-06, "loss": 0.3038, "step": 3729 }, { "epoch": 1.7346147884048984, "grad_norm": 0.3815270960330963, "learning_rate": 4.506969017350178e-06, "loss": 0.3384, "step": 3730 }, { "epoch": 1.7350798325840955, "grad_norm": 0.3915429711341858, "learning_rate": 4.5042763401492256e-06, "loss": 0.3403, "step": 3731 }, { "epoch": 1.7355448767632926, "grad_norm": 0.40549951791763306, "learning_rate": 4.50158380813768e-06, "loss": 0.3575, "step": 3732 }, { "epoch": 1.7360099209424895, "grad_norm": 0.40941810607910156, "learning_rate": 4.498891422104143e-06, "loss": 0.3286, "step": 3733 }, { "epoch": 1.7364749651216864, "grad_norm": 0.38618361949920654, "learning_rate": 4.496199182837167e-06, "loss": 0.3338, "step": 3734 }, { "epoch": 1.7369400093008835, "grad_norm": 0.3790382146835327, "learning_rate": 4.493507091125269e-06, "loss": 0.3541, "step": 3735 }, { "epoch": 1.7374050534800807, "grad_norm": 0.39466914534568787, "learning_rate": 4.490815147756915e-06, "loss": 0.3831, "step": 3736 }, { "epoch": 1.7378700976592776, "grad_norm": 0.4218112528324127, "learning_rate": 4.4881233535205345e-06, "loss": 0.3405, "step": 3737 }, { "epoch": 1.7383351418384747, "grad_norm": 0.41723906993865967, "learning_rate": 4.4854317092045085e-06, "loss": 0.37, "step": 3738 }, { "epoch": 1.7388001860176718, "grad_norm": 0.3990018367767334, "learning_rate": 4.482740215597179e-06, "loss": 0.3546, "step": 3739 }, { "epoch": 1.7392652301968687, "grad_norm": 0.4663386046886444, "learning_rate": 4.480048873486836e-06, "loss": 0.3627, "step": 3740 }, { "epoch": 1.7397302743760656, "grad_norm": 0.36172690987586975, "learning_rate": 4.477357683661734e-06, "loss": 0.2987, "step": 3741 }, { "epoch": 1.7401953185552628, "grad_norm": 0.3599375784397125, "learning_rate": 4.474666646910074e-06, "loss": 0.3212, "step": 3742 }, { "epoch": 1.74066036273446, "grad_norm": 0.43623727560043335, "learning_rate": 4.471975764020023e-06, "loss": 0.3696, "step": 3743 }, { "epoch": 1.7411254069136568, "grad_norm": 0.40363821387290955, "learning_rate": 4.469285035779693e-06, "loss": 0.3287, "step": 3744 }, { "epoch": 1.7415904510928537, "grad_norm": 0.40188169479370117, "learning_rate": 4.466594462977156e-06, "loss": 0.3471, "step": 3745 }, { "epoch": 1.7420554952720508, "grad_norm": 0.4021664261817932, "learning_rate": 4.463904046400438e-06, "loss": 0.3482, "step": 3746 }, { "epoch": 1.742520539451248, "grad_norm": 0.3717334270477295, "learning_rate": 4.4612137868375136e-06, "loss": 0.3174, "step": 3747 }, { "epoch": 1.7429855836304449, "grad_norm": 0.4289903938770294, "learning_rate": 4.458523685076321e-06, "loss": 0.3474, "step": 3748 }, { "epoch": 1.7434506278096418, "grad_norm": 0.44443613290786743, "learning_rate": 4.455833741904746e-06, "loss": 0.3668, "step": 3749 }, { "epoch": 1.743915671988839, "grad_norm": 0.36988234519958496, "learning_rate": 4.4531439581106295e-06, "loss": 0.3533, "step": 3750 }, { "epoch": 1.744380716168036, "grad_norm": 0.4682080149650574, "learning_rate": 4.450454334481763e-06, "loss": 0.3772, "step": 3751 }, { "epoch": 1.744845760347233, "grad_norm": 0.4056577682495117, "learning_rate": 4.447764871805899e-06, "loss": 0.3237, "step": 3752 }, { "epoch": 1.74531080452643, "grad_norm": 0.4110541045665741, "learning_rate": 4.4450755708707305e-06, "loss": 0.3519, "step": 3753 }, { "epoch": 1.7457758487056272, "grad_norm": 0.39790642261505127, "learning_rate": 4.442386432463915e-06, "loss": 0.3489, "step": 3754 }, { "epoch": 1.746240892884824, "grad_norm": 0.3980329632759094, "learning_rate": 4.439697457373055e-06, "loss": 0.3612, "step": 3755 }, { "epoch": 1.746705937064021, "grad_norm": 0.3943917751312256, "learning_rate": 4.437008646385711e-06, "loss": 0.3486, "step": 3756 }, { "epoch": 1.7471709812432181, "grad_norm": 0.34319841861724854, "learning_rate": 4.434320000289387e-06, "loss": 0.3213, "step": 3757 }, { "epoch": 1.7476360254224153, "grad_norm": 0.4046364724636078, "learning_rate": 4.431631519871549e-06, "loss": 0.343, "step": 3758 }, { "epoch": 1.7481010696016122, "grad_norm": 0.4628947377204895, "learning_rate": 4.428943205919605e-06, "loss": 0.3734, "step": 3759 }, { "epoch": 1.748566113780809, "grad_norm": 0.3862752914428711, "learning_rate": 4.426255059220921e-06, "loss": 0.3305, "step": 3760 }, { "epoch": 1.7490311579600062, "grad_norm": 0.37330740690231323, "learning_rate": 4.42356708056281e-06, "loss": 0.3254, "step": 3761 }, { "epoch": 1.7494962021392033, "grad_norm": 0.4342025816440582, "learning_rate": 4.420879270732539e-06, "loss": 0.3663, "step": 3762 }, { "epoch": 1.7499612463184002, "grad_norm": 0.42904359102249146, "learning_rate": 4.418191630517322e-06, "loss": 0.3377, "step": 3763 }, { "epoch": 1.7504262904975971, "grad_norm": 0.40578049421310425, "learning_rate": 4.415504160704327e-06, "loss": 0.3785, "step": 3764 }, { "epoch": 1.7508913346767943, "grad_norm": 0.34141576290130615, "learning_rate": 4.412816862080668e-06, "loss": 0.2916, "step": 3765 }, { "epoch": 1.7513563788559914, "grad_norm": 0.4625285565853119, "learning_rate": 4.4101297354334135e-06, "loss": 0.3609, "step": 3766 }, { "epoch": 1.7518214230351883, "grad_norm": 0.40238505601882935, "learning_rate": 4.407442781549577e-06, "loss": 0.3996, "step": 3767 }, { "epoch": 1.7522864672143854, "grad_norm": 0.40571123361587524, "learning_rate": 4.404756001216126e-06, "loss": 0.3533, "step": 3768 }, { "epoch": 1.7527515113935825, "grad_norm": 0.37566980719566345, "learning_rate": 4.4020693952199726e-06, "loss": 0.3394, "step": 3769 }, { "epoch": 1.7532165555727794, "grad_norm": 0.4016037583351135, "learning_rate": 4.3993829643479825e-06, "loss": 0.324, "step": 3770 }, { "epoch": 1.7536815997519763, "grad_norm": 0.41683268547058105, "learning_rate": 4.396696709386964e-06, "loss": 0.3341, "step": 3771 }, { "epoch": 1.7541466439311735, "grad_norm": 0.3792789876461029, "learning_rate": 4.394010631123681e-06, "loss": 0.3193, "step": 3772 }, { "epoch": 1.7546116881103706, "grad_norm": 0.38343918323516846, "learning_rate": 4.39132473034484e-06, "loss": 0.3722, "step": 3773 }, { "epoch": 1.7550767322895675, "grad_norm": 0.3963862359523773, "learning_rate": 4.388639007837101e-06, "loss": 0.3609, "step": 3774 }, { "epoch": 1.7555417764687644, "grad_norm": 0.3763202428817749, "learning_rate": 4.385953464387064e-06, "loss": 0.3183, "step": 3775 }, { "epoch": 1.7560068206479615, "grad_norm": 0.3931805491447449, "learning_rate": 4.383268100781285e-06, "loss": 0.3821, "step": 3776 }, { "epoch": 1.7564718648271587, "grad_norm": 0.3617267906665802, "learning_rate": 4.38058291780626e-06, "loss": 0.3264, "step": 3777 }, { "epoch": 1.7569369090063556, "grad_norm": 0.38972967863082886, "learning_rate": 4.377897916248438e-06, "loss": 0.3538, "step": 3778 }, { "epoch": 1.7574019531855525, "grad_norm": 0.4271766245365143, "learning_rate": 4.37521309689421e-06, "loss": 0.3718, "step": 3779 }, { "epoch": 1.7578669973647496, "grad_norm": 0.3271983861923218, "learning_rate": 4.37252846052992e-06, "loss": 0.3391, "step": 3780 }, { "epoch": 1.7583320415439467, "grad_norm": 0.41087400913238525, "learning_rate": 4.36984400794185e-06, "loss": 0.3266, "step": 3781 }, { "epoch": 1.7587970857231436, "grad_norm": 0.44612687826156616, "learning_rate": 4.367159739916236e-06, "loss": 0.334, "step": 3782 }, { "epoch": 1.7592621299023408, "grad_norm": 0.3791424632072449, "learning_rate": 4.364475657239253e-06, "loss": 0.3684, "step": 3783 }, { "epoch": 1.759727174081538, "grad_norm": 0.4565580189228058, "learning_rate": 4.361791760697027e-06, "loss": 0.3547, "step": 3784 }, { "epoch": 1.7601922182607348, "grad_norm": 0.48362359404563904, "learning_rate": 4.35910805107563e-06, "loss": 0.3483, "step": 3785 }, { "epoch": 1.7606572624399317, "grad_norm": 0.37739720940589905, "learning_rate": 4.356424529161072e-06, "loss": 0.351, "step": 3786 }, { "epoch": 1.7611223066191288, "grad_norm": 0.36957481503486633, "learning_rate": 4.353741195739318e-06, "loss": 0.362, "step": 3787 }, { "epoch": 1.761587350798326, "grad_norm": 0.39713501930236816, "learning_rate": 4.351058051596269e-06, "loss": 0.3421, "step": 3788 }, { "epoch": 1.7620523949775229, "grad_norm": 0.37897831201553345, "learning_rate": 4.348375097517776e-06, "loss": 0.3196, "step": 3789 }, { "epoch": 1.7625174391567198, "grad_norm": 0.38132160902023315, "learning_rate": 4.345692334289632e-06, "loss": 0.3764, "step": 3790 }, { "epoch": 1.762982483335917, "grad_norm": 0.39471349120140076, "learning_rate": 4.343009762697577e-06, "loss": 0.3472, "step": 3791 }, { "epoch": 1.763447527515114, "grad_norm": 0.35921382904052734, "learning_rate": 4.340327383527289e-06, "loss": 0.3489, "step": 3792 }, { "epoch": 1.763912571694311, "grad_norm": 0.3691664934158325, "learning_rate": 4.337645197564398e-06, "loss": 0.3357, "step": 3793 }, { "epoch": 1.7643776158735078, "grad_norm": 0.38761186599731445, "learning_rate": 4.334963205594467e-06, "loss": 0.3535, "step": 3794 }, { "epoch": 1.764842660052705, "grad_norm": 0.4036206305027008, "learning_rate": 4.332281408403011e-06, "loss": 0.3786, "step": 3795 }, { "epoch": 1.765307704231902, "grad_norm": 0.37231314182281494, "learning_rate": 4.3295998067754844e-06, "loss": 0.3068, "step": 3796 }, { "epoch": 1.765772748411099, "grad_norm": 0.38186076283454895, "learning_rate": 4.326918401497287e-06, "loss": 0.334, "step": 3797 }, { "epoch": 1.7662377925902961, "grad_norm": 0.4085099697113037, "learning_rate": 4.3242371933537554e-06, "loss": 0.3273, "step": 3798 }, { "epoch": 1.7667028367694932, "grad_norm": 0.4823872447013855, "learning_rate": 4.321556183130175e-06, "loss": 0.3616, "step": 3799 }, { "epoch": 1.7671678809486902, "grad_norm": 0.38693398237228394, "learning_rate": 4.318875371611766e-06, "loss": 0.3751, "step": 3800 }, { "epoch": 1.767632925127887, "grad_norm": 0.4294361472129822, "learning_rate": 4.3161947595836985e-06, "loss": 0.3404, "step": 3801 }, { "epoch": 1.7680979693070842, "grad_norm": 0.46202561259269714, "learning_rate": 4.313514347831077e-06, "loss": 0.3245, "step": 3802 }, { "epoch": 1.7685630134862813, "grad_norm": 0.42043882608413696, "learning_rate": 4.310834137138956e-06, "loss": 0.3401, "step": 3803 }, { "epoch": 1.7690280576654782, "grad_norm": 0.4185798168182373, "learning_rate": 4.308154128292318e-06, "loss": 0.359, "step": 3804 }, { "epoch": 1.7694931018446751, "grad_norm": 0.44950559735298157, "learning_rate": 4.305474322076102e-06, "loss": 0.3354, "step": 3805 }, { "epoch": 1.7699581460238722, "grad_norm": 0.4571090638637543, "learning_rate": 4.302794719275173e-06, "loss": 0.3484, "step": 3806 }, { "epoch": 1.7704231902030694, "grad_norm": 0.42826327681541443, "learning_rate": 4.300115320674346e-06, "loss": 0.3595, "step": 3807 }, { "epoch": 1.7708882343822663, "grad_norm": 0.42987918853759766, "learning_rate": 4.297436127058373e-06, "loss": 0.3028, "step": 3808 }, { "epoch": 1.7713532785614632, "grad_norm": 0.4052066206932068, "learning_rate": 4.294757139211948e-06, "loss": 0.3719, "step": 3809 }, { "epoch": 1.7718183227406603, "grad_norm": 0.4018250107765198, "learning_rate": 4.292078357919701e-06, "loss": 0.3937, "step": 3810 }, { "epoch": 1.7722833669198574, "grad_norm": 0.40564820170402527, "learning_rate": 4.289399783966205e-06, "loss": 0.344, "step": 3811 }, { "epoch": 1.7727484110990543, "grad_norm": 0.4192334711551666, "learning_rate": 4.286721418135968e-06, "loss": 0.3571, "step": 3812 }, { "epoch": 1.7732134552782515, "grad_norm": 0.36322298645973206, "learning_rate": 4.284043261213442e-06, "loss": 0.3576, "step": 3813 }, { "epoch": 1.7736784994574486, "grad_norm": 0.35039764642715454, "learning_rate": 4.281365313983016e-06, "loss": 0.3473, "step": 3814 }, { "epoch": 1.7741435436366455, "grad_norm": 0.3839752972126007, "learning_rate": 4.278687577229018e-06, "loss": 0.3502, "step": 3815 }, { "epoch": 1.7746085878158424, "grad_norm": 0.4036378264427185, "learning_rate": 4.2760100517357095e-06, "loss": 0.3351, "step": 3816 }, { "epoch": 1.7750736319950395, "grad_norm": 0.3815009891986847, "learning_rate": 4.273332738287299e-06, "loss": 0.3496, "step": 3817 }, { "epoch": 1.7755386761742367, "grad_norm": 0.38633623719215393, "learning_rate": 4.270655637667926e-06, "loss": 0.3215, "step": 3818 }, { "epoch": 1.7760037203534336, "grad_norm": 0.39606183767318726, "learning_rate": 4.267978750661669e-06, "loss": 0.3445, "step": 3819 }, { "epoch": 1.7764687645326305, "grad_norm": 0.39145374298095703, "learning_rate": 4.265302078052546e-06, "loss": 0.3274, "step": 3820 }, { "epoch": 1.7769338087118276, "grad_norm": 0.37993159890174866, "learning_rate": 4.26262562062451e-06, "loss": 0.3546, "step": 3821 }, { "epoch": 1.7773988528910247, "grad_norm": 0.41166287660598755, "learning_rate": 4.259949379161454e-06, "loss": 0.3559, "step": 3822 }, { "epoch": 1.7778638970702216, "grad_norm": 0.4474835991859436, "learning_rate": 4.2572733544472025e-06, "loss": 0.3525, "step": 3823 }, { "epoch": 1.7783289412494185, "grad_norm": 0.38438984751701355, "learning_rate": 4.2545975472655215e-06, "loss": 0.3461, "step": 3824 }, { "epoch": 1.7787939854286157, "grad_norm": 0.37471044063568115, "learning_rate": 4.2519219584001106e-06, "loss": 0.3681, "step": 3825 }, { "epoch": 1.7792590296078128, "grad_norm": 0.3626500368118286, "learning_rate": 4.249246588634609e-06, "loss": 0.334, "step": 3826 }, { "epoch": 1.7797240737870097, "grad_norm": 0.34971731901168823, "learning_rate": 4.246571438752585e-06, "loss": 0.3683, "step": 3827 }, { "epoch": 1.7801891179662068, "grad_norm": 0.37381622195243835, "learning_rate": 4.243896509537551e-06, "loss": 0.3602, "step": 3828 }, { "epoch": 1.780654162145404, "grad_norm": 0.4181731343269348, "learning_rate": 4.241221801772945e-06, "loss": 0.3684, "step": 3829 }, { "epoch": 1.7811192063246009, "grad_norm": 0.3543993830680847, "learning_rate": 4.238547316242149e-06, "loss": 0.3383, "step": 3830 }, { "epoch": 1.7815842505037978, "grad_norm": 0.4083957076072693, "learning_rate": 4.235873053728475e-06, "loss": 0.372, "step": 3831 }, { "epoch": 1.7820492946829949, "grad_norm": 0.3555138409137726, "learning_rate": 4.2331990150151745e-06, "loss": 0.351, "step": 3832 }, { "epoch": 1.782514338862192, "grad_norm": 0.40743115544319153, "learning_rate": 4.230525200885425e-06, "loss": 0.3407, "step": 3833 }, { "epoch": 1.782979383041389, "grad_norm": 0.435773104429245, "learning_rate": 4.227851612122347e-06, "loss": 0.3665, "step": 3834 }, { "epoch": 1.7834444272205858, "grad_norm": 0.3714430332183838, "learning_rate": 4.225178249508988e-06, "loss": 0.3557, "step": 3835 }, { "epoch": 1.783909471399783, "grad_norm": 0.35675156116485596, "learning_rate": 4.222505113828335e-06, "loss": 0.3125, "step": 3836 }, { "epoch": 1.78437451557898, "grad_norm": 0.3813643753528595, "learning_rate": 4.219832205863303e-06, "loss": 0.3653, "step": 3837 }, { "epoch": 1.784839559758177, "grad_norm": 0.4000234603881836, "learning_rate": 4.217159526396749e-06, "loss": 0.3811, "step": 3838 }, { "epoch": 1.785304603937374, "grad_norm": 0.3764249086380005, "learning_rate": 4.214487076211452e-06, "loss": 0.2882, "step": 3839 }, { "epoch": 1.7857696481165712, "grad_norm": 0.47971102595329285, "learning_rate": 4.2118148560901325e-06, "loss": 0.3693, "step": 3840 }, { "epoch": 1.7862346922957681, "grad_norm": 0.3611883521080017, "learning_rate": 4.209142866815438e-06, "loss": 0.3483, "step": 3841 }, { "epoch": 1.786699736474965, "grad_norm": 0.3263775110244751, "learning_rate": 4.206471109169952e-06, "loss": 0.3338, "step": 3842 }, { "epoch": 1.7871647806541622, "grad_norm": 0.4258721172809601, "learning_rate": 4.2037995839361876e-06, "loss": 0.3556, "step": 3843 }, { "epoch": 1.7876298248333593, "grad_norm": 0.43471091985702515, "learning_rate": 4.201128291896594e-06, "loss": 0.349, "step": 3844 }, { "epoch": 1.7880948690125562, "grad_norm": 0.4108128249645233, "learning_rate": 4.198457233833546e-06, "loss": 0.3643, "step": 3845 }, { "epoch": 1.7885599131917531, "grad_norm": 0.3940763771533966, "learning_rate": 4.195786410529357e-06, "loss": 0.3659, "step": 3846 }, { "epoch": 1.7890249573709502, "grad_norm": 0.4194324314594269, "learning_rate": 4.193115822766263e-06, "loss": 0.3359, "step": 3847 }, { "epoch": 1.7894900015501474, "grad_norm": 0.37528660893440247, "learning_rate": 4.19044547132644e-06, "loss": 0.3255, "step": 3848 }, { "epoch": 1.7899550457293443, "grad_norm": 0.4335568845272064, "learning_rate": 4.1877753569919865e-06, "loss": 0.3884, "step": 3849 }, { "epoch": 1.7904200899085412, "grad_norm": 0.41178300976753235, "learning_rate": 4.18510548054494e-06, "loss": 0.3179, "step": 3850 }, { "epoch": 1.7908851340877383, "grad_norm": 0.41776037216186523, "learning_rate": 4.18243584276726e-06, "loss": 0.3448, "step": 3851 }, { "epoch": 1.7913501782669354, "grad_norm": 0.40628287196159363, "learning_rate": 4.179766444440844e-06, "loss": 0.3959, "step": 3852 }, { "epoch": 1.7918152224461323, "grad_norm": 0.37497735023498535, "learning_rate": 4.177097286347511e-06, "loss": 0.3118, "step": 3853 }, { "epoch": 1.7922802666253292, "grad_norm": 0.4827350378036499, "learning_rate": 4.174428369269018e-06, "loss": 0.3839, "step": 3854 }, { "epoch": 1.7927453108045266, "grad_norm": 0.3956352472305298, "learning_rate": 4.171759693987046e-06, "loss": 0.3378, "step": 3855 }, { "epoch": 1.7932103549837235, "grad_norm": 0.3712174892425537, "learning_rate": 4.169091261283205e-06, "loss": 0.3207, "step": 3856 }, { "epoch": 1.7936753991629204, "grad_norm": 0.48411279916763306, "learning_rate": 4.166423071939038e-06, "loss": 0.3747, "step": 3857 }, { "epoch": 1.7941404433421175, "grad_norm": 0.38150522112846375, "learning_rate": 4.163755126736011e-06, "loss": 0.3312, "step": 3858 }, { "epoch": 1.7946054875213147, "grad_norm": 0.3953656554222107, "learning_rate": 4.1610874264555265e-06, "loss": 0.3349, "step": 3859 }, { "epoch": 1.7950705317005116, "grad_norm": 0.4521973431110382, "learning_rate": 4.158419971878907e-06, "loss": 0.3255, "step": 3860 }, { "epoch": 1.7955355758797085, "grad_norm": 0.3754480481147766, "learning_rate": 4.155752763787409e-06, "loss": 0.3399, "step": 3861 }, { "epoch": 1.7960006200589056, "grad_norm": 0.36341235041618347, "learning_rate": 4.1530858029622125e-06, "loss": 0.3306, "step": 3862 }, { "epoch": 1.7964656642381027, "grad_norm": 0.5212793946266174, "learning_rate": 4.150419090184428e-06, "loss": 0.4108, "step": 3863 }, { "epoch": 1.7969307084172996, "grad_norm": 0.37436044216156006, "learning_rate": 4.147752626235092e-06, "loss": 0.3122, "step": 3864 }, { "epoch": 1.7973957525964965, "grad_norm": 0.3866221606731415, "learning_rate": 4.145086411895168e-06, "loss": 0.3834, "step": 3865 }, { "epoch": 1.7978607967756937, "grad_norm": 0.42558223009109497, "learning_rate": 4.142420447945548e-06, "loss": 0.4011, "step": 3866 }, { "epoch": 1.7983258409548908, "grad_norm": 0.3522246479988098, "learning_rate": 4.13975473516705e-06, "loss": 0.2951, "step": 3867 }, { "epoch": 1.7987908851340877, "grad_norm": 0.37685632705688477, "learning_rate": 4.137089274340415e-06, "loss": 0.3199, "step": 3868 }, { "epoch": 1.7992559293132846, "grad_norm": 0.3827194571495056, "learning_rate": 4.134424066246318e-06, "loss": 0.3436, "step": 3869 }, { "epoch": 1.799720973492482, "grad_norm": 0.4337718188762665, "learning_rate": 4.131759111665349e-06, "loss": 0.3462, "step": 3870 }, { "epoch": 1.8001860176716789, "grad_norm": 0.40320268273353577, "learning_rate": 4.129094411378034e-06, "loss": 0.3414, "step": 3871 }, { "epoch": 1.8006510618508758, "grad_norm": 0.37748897075653076, "learning_rate": 4.1264299661648195e-06, "loss": 0.3365, "step": 3872 }, { "epoch": 1.8011161060300729, "grad_norm": 0.3657347857952118, "learning_rate": 4.123765776806081e-06, "loss": 0.339, "step": 3873 }, { "epoch": 1.80158115020927, "grad_norm": 0.4097522497177124, "learning_rate": 4.121101844082111e-06, "loss": 0.3451, "step": 3874 }, { "epoch": 1.802046194388467, "grad_norm": 0.3985246419906616, "learning_rate": 4.118438168773137e-06, "loss": 0.3597, "step": 3875 }, { "epoch": 1.8025112385676638, "grad_norm": 0.41197559237480164, "learning_rate": 4.115774751659302e-06, "loss": 0.3268, "step": 3876 }, { "epoch": 1.802976282746861, "grad_norm": 0.3859005272388458, "learning_rate": 4.11311159352068e-06, "loss": 0.3354, "step": 3877 }, { "epoch": 1.803441326926058, "grad_norm": 0.3585662841796875, "learning_rate": 4.110448695137266e-06, "loss": 0.3303, "step": 3878 }, { "epoch": 1.803906371105255, "grad_norm": 0.38057127594947815, "learning_rate": 4.107786057288982e-06, "loss": 0.3306, "step": 3879 }, { "epoch": 1.8043714152844519, "grad_norm": 0.4793699085712433, "learning_rate": 4.105123680755667e-06, "loss": 0.3414, "step": 3880 }, { "epoch": 1.804836459463649, "grad_norm": 0.4067707061767578, "learning_rate": 4.102461566317093e-06, "loss": 0.322, "step": 3881 }, { "epoch": 1.8053015036428461, "grad_norm": 0.4255411922931671, "learning_rate": 4.099799714752944e-06, "loss": 0.296, "step": 3882 }, { "epoch": 1.805766547822043, "grad_norm": 0.40467318892478943, "learning_rate": 4.097138126842839e-06, "loss": 0.3672, "step": 3883 }, { "epoch": 1.8062315920012402, "grad_norm": 0.41983935236930847, "learning_rate": 4.09447680336631e-06, "loss": 0.3322, "step": 3884 }, { "epoch": 1.8066966361804373, "grad_norm": 0.37576842308044434, "learning_rate": 4.091815745102818e-06, "loss": 0.3064, "step": 3885 }, { "epoch": 1.8071616803596342, "grad_norm": 0.44206252694129944, "learning_rate": 4.089154952831741e-06, "loss": 0.3848, "step": 3886 }, { "epoch": 1.807626724538831, "grad_norm": 0.3728598356246948, "learning_rate": 4.086494427332386e-06, "loss": 0.3392, "step": 3887 }, { "epoch": 1.8080917687180282, "grad_norm": 0.41656002402305603, "learning_rate": 4.083834169383972e-06, "loss": 0.3432, "step": 3888 }, { "epoch": 1.8085568128972254, "grad_norm": 0.40540826320648193, "learning_rate": 4.0811741797656505e-06, "loss": 0.3381, "step": 3889 }, { "epoch": 1.8090218570764223, "grad_norm": 0.40355539321899414, "learning_rate": 4.078514459256485e-06, "loss": 0.3464, "step": 3890 }, { "epoch": 1.8094869012556192, "grad_norm": 0.37891075015068054, "learning_rate": 4.075855008635468e-06, "loss": 0.3557, "step": 3891 }, { "epoch": 1.8099519454348163, "grad_norm": 0.35543182492256165, "learning_rate": 4.073195828681509e-06, "loss": 0.3177, "step": 3892 }, { "epoch": 1.8104169896140134, "grad_norm": 0.400921106338501, "learning_rate": 4.070536920173435e-06, "loss": 0.3601, "step": 3893 }, { "epoch": 1.8108820337932103, "grad_norm": 0.35732823610305786, "learning_rate": 4.067878283890002e-06, "loss": 0.345, "step": 3894 }, { "epoch": 1.8113470779724072, "grad_norm": 0.39798301458358765, "learning_rate": 4.065219920609877e-06, "loss": 0.3464, "step": 3895 }, { "epoch": 1.8118121221516044, "grad_norm": 0.3960334062576294, "learning_rate": 4.062561831111656e-06, "loss": 0.3632, "step": 3896 }, { "epoch": 1.8122771663308015, "grad_norm": 0.3809148669242859, "learning_rate": 4.059904016173844e-06, "loss": 0.3482, "step": 3897 }, { "epoch": 1.8127422105099984, "grad_norm": 0.397305965423584, "learning_rate": 4.05724647657488e-06, "loss": 0.3559, "step": 3898 }, { "epoch": 1.8132072546891955, "grad_norm": 0.3920312225818634, "learning_rate": 4.0545892130931065e-06, "loss": 0.3402, "step": 3899 }, { "epoch": 1.8136722988683927, "grad_norm": 0.39747723937034607, "learning_rate": 4.051932226506797e-06, "loss": 0.3551, "step": 3900 }, { "epoch": 1.8141373430475896, "grad_norm": 0.4466649889945984, "learning_rate": 4.049275517594137e-06, "loss": 0.3301, "step": 3901 }, { "epoch": 1.8146023872267865, "grad_norm": 0.413919597864151, "learning_rate": 4.046619087133238e-06, "loss": 0.3609, "step": 3902 }, { "epoch": 1.8150674314059836, "grad_norm": 0.34546610713005066, "learning_rate": 4.04396293590212e-06, "loss": 0.3098, "step": 3903 }, { "epoch": 1.8155324755851807, "grad_norm": 0.40819764137268066, "learning_rate": 4.0413070646787325e-06, "loss": 0.3451, "step": 3904 }, { "epoch": 1.8159975197643776, "grad_norm": 0.3749530017375946, "learning_rate": 4.03865147424093e-06, "loss": 0.3456, "step": 3905 }, { "epoch": 1.8164625639435745, "grad_norm": 0.3659387528896332, "learning_rate": 4.035996165366497e-06, "loss": 0.3042, "step": 3906 }, { "epoch": 1.8169276081227717, "grad_norm": 0.41476431488990784, "learning_rate": 4.033341138833127e-06, "loss": 0.3314, "step": 3907 }, { "epoch": 1.8173926523019688, "grad_norm": 0.39919087290763855, "learning_rate": 4.030686395418439e-06, "loss": 0.3424, "step": 3908 }, { "epoch": 1.8178576964811657, "grad_norm": 0.40032151341438293, "learning_rate": 4.028031935899958e-06, "loss": 0.3332, "step": 3909 }, { "epoch": 1.8183227406603626, "grad_norm": 0.41616636514663696, "learning_rate": 4.025377761055138e-06, "loss": 0.3357, "step": 3910 }, { "epoch": 1.8187877848395597, "grad_norm": 0.40295374393463135, "learning_rate": 4.022723871661338e-06, "loss": 0.3281, "step": 3911 }, { "epoch": 1.8192528290187568, "grad_norm": 0.49246686697006226, "learning_rate": 4.020070268495844e-06, "loss": 0.3867, "step": 3912 }, { "epoch": 1.8197178731979538, "grad_norm": 0.38608917593955994, "learning_rate": 4.017416952335849e-06, "loss": 0.3213, "step": 3913 }, { "epoch": 1.8201829173771509, "grad_norm": 0.3529578745365143, "learning_rate": 4.014763923958471e-06, "loss": 0.3246, "step": 3914 }, { "epoch": 1.820647961556348, "grad_norm": 0.42414984107017517, "learning_rate": 4.0121111841407345e-06, "loss": 0.3668, "step": 3915 }, { "epoch": 1.821113005735545, "grad_norm": 0.406495064496994, "learning_rate": 4.0094587336595875e-06, "loss": 0.3176, "step": 3916 }, { "epoch": 1.8215780499147418, "grad_norm": 0.39914000034332275, "learning_rate": 4.006806573291886e-06, "loss": 0.343, "step": 3917 }, { "epoch": 1.822043094093939, "grad_norm": 0.3730376958847046, "learning_rate": 4.004154703814407e-06, "loss": 0.2956, "step": 3918 }, { "epoch": 1.822508138273136, "grad_norm": 0.43556198477745056, "learning_rate": 4.00150312600384e-06, "loss": 0.3393, "step": 3919 }, { "epoch": 1.822973182452333, "grad_norm": 0.3719125986099243, "learning_rate": 3.998851840636789e-06, "loss": 0.3421, "step": 3920 }, { "epoch": 1.8234382266315299, "grad_norm": 0.40865078568458557, "learning_rate": 3.996200848489771e-06, "loss": 0.3597, "step": 3921 }, { "epoch": 1.823903270810727, "grad_norm": 0.38333582878112793, "learning_rate": 3.9935501503392214e-06, "loss": 0.3604, "step": 3922 }, { "epoch": 1.8243683149899241, "grad_norm": 0.40074682235717773, "learning_rate": 3.990899746961483e-06, "loss": 0.3406, "step": 3923 }, { "epoch": 1.824833359169121, "grad_norm": 0.4388389587402344, "learning_rate": 3.9882496391328185e-06, "loss": 0.3327, "step": 3924 }, { "epoch": 1.825298403348318, "grad_norm": 0.40420886874198914, "learning_rate": 3.9855998276294e-06, "loss": 0.3187, "step": 3925 }, { "epoch": 1.825763447527515, "grad_norm": 0.4143049418926239, "learning_rate": 3.982950313227317e-06, "loss": 0.3311, "step": 3926 }, { "epoch": 1.8262284917067122, "grad_norm": 0.4336578845977783, "learning_rate": 3.980301096702567e-06, "loss": 0.3923, "step": 3927 }, { "epoch": 1.826693535885909, "grad_norm": 0.395292729139328, "learning_rate": 3.9776521788310605e-06, "loss": 0.3403, "step": 3928 }, { "epoch": 1.8271585800651062, "grad_norm": 0.38078877329826355, "learning_rate": 3.975003560388625e-06, "loss": 0.3363, "step": 3929 }, { "epoch": 1.8276236242443034, "grad_norm": 0.375010222196579, "learning_rate": 3.9723552421509975e-06, "loss": 0.3441, "step": 3930 }, { "epoch": 1.8280886684235003, "grad_norm": 0.3816002607345581, "learning_rate": 3.969707224893829e-06, "loss": 0.3388, "step": 3931 }, { "epoch": 1.8285537126026972, "grad_norm": 0.3783058524131775, "learning_rate": 3.967059509392677e-06, "loss": 0.3584, "step": 3932 }, { "epoch": 1.8290187567818943, "grad_norm": 0.4086306095123291, "learning_rate": 3.964412096423019e-06, "loss": 0.3437, "step": 3933 }, { "epoch": 1.8294838009610914, "grad_norm": 0.3575378954410553, "learning_rate": 3.961764986760234e-06, "loss": 0.3245, "step": 3934 }, { "epoch": 1.8299488451402883, "grad_norm": 0.4207441806793213, "learning_rate": 3.959118181179622e-06, "loss": 0.3375, "step": 3935 }, { "epoch": 1.8304138893194852, "grad_norm": 0.39432263374328613, "learning_rate": 3.9564716804563855e-06, "loss": 0.3562, "step": 3936 }, { "epoch": 1.8308789334986824, "grad_norm": 0.3997967541217804, "learning_rate": 3.9538254853656465e-06, "loss": 0.3544, "step": 3937 }, { "epoch": 1.8313439776778795, "grad_norm": 0.369190514087677, "learning_rate": 3.951179596682427e-06, "loss": 0.3524, "step": 3938 }, { "epoch": 1.8318090218570764, "grad_norm": 0.37524935603141785, "learning_rate": 3.948534015181671e-06, "loss": 0.3549, "step": 3939 }, { "epoch": 1.8322740660362733, "grad_norm": 0.36414414644241333, "learning_rate": 3.945888741638219e-06, "loss": 0.3104, "step": 3940 }, { "epoch": 1.8327391102154704, "grad_norm": 0.3788766860961914, "learning_rate": 3.943243776826834e-06, "loss": 0.3434, "step": 3941 }, { "epoch": 1.8332041543946676, "grad_norm": 0.3915799558162689, "learning_rate": 3.94059912152218e-06, "loss": 0.3332, "step": 3942 }, { "epoch": 1.8336691985738645, "grad_norm": 0.4084991216659546, "learning_rate": 3.937954776498839e-06, "loss": 0.356, "step": 3943 }, { "epoch": 1.8341342427530616, "grad_norm": 0.38015294075012207, "learning_rate": 3.93531074253129e-06, "loss": 0.3525, "step": 3944 }, { "epoch": 1.8345992869322587, "grad_norm": 0.3744085133075714, "learning_rate": 3.932667020393933e-06, "loss": 0.3628, "step": 3945 }, { "epoch": 1.8350643311114556, "grad_norm": 0.32951560616493225, "learning_rate": 3.930023610861067e-06, "loss": 0.3227, "step": 3946 }, { "epoch": 1.8355293752906525, "grad_norm": 0.3684207499027252, "learning_rate": 3.927380514706906e-06, "loss": 0.3635, "step": 3947 }, { "epoch": 1.8359944194698496, "grad_norm": 0.34596481919288635, "learning_rate": 3.924737732705568e-06, "loss": 0.3528, "step": 3948 }, { "epoch": 1.8364594636490468, "grad_norm": 0.3639676868915558, "learning_rate": 3.9220952656310855e-06, "loss": 0.3836, "step": 3949 }, { "epoch": 1.8369245078282437, "grad_norm": 0.34727898240089417, "learning_rate": 3.919453114257389e-06, "loss": 0.31, "step": 3950 }, { "epoch": 1.8373895520074406, "grad_norm": 0.3681538701057434, "learning_rate": 3.916811279358326e-06, "loss": 0.3589, "step": 3951 }, { "epoch": 1.8378545961866377, "grad_norm": 0.41221883893013, "learning_rate": 3.9141697617076414e-06, "loss": 0.3592, "step": 3952 }, { "epoch": 1.8383196403658348, "grad_norm": 0.3793887794017792, "learning_rate": 3.911528562078999e-06, "loss": 0.3131, "step": 3953 }, { "epoch": 1.8387846845450317, "grad_norm": 0.3928253650665283, "learning_rate": 3.9088876812459585e-06, "loss": 0.3305, "step": 3954 }, { "epoch": 1.8392497287242286, "grad_norm": 0.4711274802684784, "learning_rate": 3.906247119981995e-06, "loss": 0.4163, "step": 3955 }, { "epoch": 1.8397147729034258, "grad_norm": 0.3746742606163025, "learning_rate": 3.903606879060483e-06, "loss": 0.3223, "step": 3956 }, { "epoch": 1.840179817082623, "grad_norm": 0.4172133207321167, "learning_rate": 3.900966959254709e-06, "loss": 0.3512, "step": 3957 }, { "epoch": 1.8406448612618198, "grad_norm": 0.3593921959400177, "learning_rate": 3.898327361337859e-06, "loss": 0.3413, "step": 3958 }, { "epoch": 1.841109905441017, "grad_norm": 0.40812554955482483, "learning_rate": 3.89568808608303e-06, "loss": 0.3318, "step": 3959 }, { "epoch": 1.841574949620214, "grad_norm": 0.42885205149650574, "learning_rate": 3.8930491342632235e-06, "loss": 0.3515, "step": 3960 }, { "epoch": 1.842039993799411, "grad_norm": 0.3699992597103119, "learning_rate": 3.890410506651346e-06, "loss": 0.3183, "step": 3961 }, { "epoch": 1.8425050379786079, "grad_norm": 0.44218167662620544, "learning_rate": 3.887772204020207e-06, "loss": 0.3596, "step": 3962 }, { "epoch": 1.842970082157805, "grad_norm": 0.38194793462753296, "learning_rate": 3.885134227142525e-06, "loss": 0.3587, "step": 3963 }, { "epoch": 1.8434351263370021, "grad_norm": 0.3831635117530823, "learning_rate": 3.882496576790918e-06, "loss": 0.3255, "step": 3964 }, { "epoch": 1.843900170516199, "grad_norm": 0.4331647455692291, "learning_rate": 3.879859253737911e-06, "loss": 0.3348, "step": 3965 }, { "epoch": 1.844365214695396, "grad_norm": 0.41714081168174744, "learning_rate": 3.8772222587559345e-06, "loss": 0.317, "step": 3966 }, { "epoch": 1.844830258874593, "grad_norm": 0.3952346444129944, "learning_rate": 3.8745855926173205e-06, "loss": 0.3647, "step": 3967 }, { "epoch": 1.8452953030537902, "grad_norm": 0.38905858993530273, "learning_rate": 3.871949256094308e-06, "loss": 0.3791, "step": 3968 }, { "epoch": 1.845760347232987, "grad_norm": 0.4492240250110626, "learning_rate": 3.869313249959033e-06, "loss": 0.3301, "step": 3969 }, { "epoch": 1.846225391412184, "grad_norm": 0.4257234036922455, "learning_rate": 3.866677574983542e-06, "loss": 0.3827, "step": 3970 }, { "epoch": 1.8466904355913811, "grad_norm": 0.3498407304286957, "learning_rate": 3.86404223193978e-06, "loss": 0.356, "step": 3971 }, { "epoch": 1.8471554797705783, "grad_norm": 0.35863253474235535, "learning_rate": 3.861407221599598e-06, "loss": 0.3364, "step": 3972 }, { "epoch": 1.8476205239497752, "grad_norm": 0.3656211495399475, "learning_rate": 3.858772544734745e-06, "loss": 0.3302, "step": 3973 }, { "epoch": 1.8480855681289723, "grad_norm": 0.46760740876197815, "learning_rate": 3.856138202116878e-06, "loss": 0.3454, "step": 3974 }, { "epoch": 1.8485506123081694, "grad_norm": 0.4070281684398651, "learning_rate": 3.853504194517551e-06, "loss": 0.3385, "step": 3975 }, { "epoch": 1.8490156564873663, "grad_norm": 0.3659380376338959, "learning_rate": 3.850870522708222e-06, "loss": 0.3412, "step": 3976 }, { "epoch": 1.8494807006665632, "grad_norm": 0.3773347735404968, "learning_rate": 3.848237187460252e-06, "loss": 0.3553, "step": 3977 }, { "epoch": 1.8499457448457604, "grad_norm": 0.4317549765110016, "learning_rate": 3.845604189544902e-06, "loss": 0.3417, "step": 3978 }, { "epoch": 1.8504107890249575, "grad_norm": 0.4554439187049866, "learning_rate": 3.842971529733333e-06, "loss": 0.3545, "step": 3979 }, { "epoch": 1.8508758332041544, "grad_norm": 0.4062388241291046, "learning_rate": 3.840339208796611e-06, "loss": 0.3467, "step": 3980 }, { "epoch": 1.8513408773833513, "grad_norm": 0.36868301033973694, "learning_rate": 3.837707227505696e-06, "loss": 0.3109, "step": 3981 }, { "epoch": 1.8518059215625484, "grad_norm": 0.4157978594303131, "learning_rate": 3.8350755866314555e-06, "loss": 0.3758, "step": 3982 }, { "epoch": 1.8522709657417455, "grad_norm": 0.4424905776977539, "learning_rate": 3.8324442869446525e-06, "loss": 0.3405, "step": 3983 }, { "epoch": 1.8527360099209425, "grad_norm": 0.3770042657852173, "learning_rate": 3.829813329215956e-06, "loss": 0.3587, "step": 3984 }, { "epoch": 1.8532010541001394, "grad_norm": 0.39475545287132263, "learning_rate": 3.827182714215925e-06, "loss": 0.3133, "step": 3985 }, { "epoch": 1.8536660982793365, "grad_norm": 0.48087528347969055, "learning_rate": 3.824552442715029e-06, "loss": 0.3636, "step": 3986 }, { "epoch": 1.8541311424585336, "grad_norm": 0.41496339440345764, "learning_rate": 3.821922515483627e-06, "loss": 0.3245, "step": 3987 }, { "epoch": 1.8545961866377305, "grad_norm": 0.38482680916786194, "learning_rate": 3.819292933291986e-06, "loss": 0.3443, "step": 3988 }, { "epoch": 1.8550612308169276, "grad_norm": 0.40696123242378235, "learning_rate": 3.8166636969102655e-06, "loss": 0.3456, "step": 3989 }, { "epoch": 1.8555262749961248, "grad_norm": 0.37776339054107666, "learning_rate": 3.814034807108529e-06, "loss": 0.3619, "step": 3990 }, { "epoch": 1.8559913191753217, "grad_norm": 0.40916046500205994, "learning_rate": 3.8114062646567317e-06, "loss": 0.3444, "step": 3991 }, { "epoch": 1.8564563633545186, "grad_norm": 0.3734774589538574, "learning_rate": 3.808778070324735e-06, "loss": 0.341, "step": 3992 }, { "epoch": 1.8569214075337157, "grad_norm": 0.38891860842704773, "learning_rate": 3.80615022488229e-06, "loss": 0.3358, "step": 3993 }, { "epoch": 1.8573864517129128, "grad_norm": 0.40861591696739197, "learning_rate": 3.803522729099054e-06, "loss": 0.3432, "step": 3994 }, { "epoch": 1.8578514958921097, "grad_norm": 0.45857205986976624, "learning_rate": 3.8008955837445742e-06, "loss": 0.3707, "step": 3995 }, { "epoch": 1.8583165400713066, "grad_norm": 0.38723862171173096, "learning_rate": 3.7982687895883036e-06, "loss": 0.3699, "step": 3996 }, { "epoch": 1.8587815842505038, "grad_norm": 0.37383538484573364, "learning_rate": 3.795642347399582e-06, "loss": 0.3368, "step": 3997 }, { "epoch": 1.859246628429701, "grad_norm": 0.38865286111831665, "learning_rate": 3.7930162579476566e-06, "loss": 0.3678, "step": 3998 }, { "epoch": 1.8597116726088978, "grad_norm": 0.42665407061576843, "learning_rate": 3.790390522001662e-06, "loss": 0.3673, "step": 3999 }, { "epoch": 1.8601767167880947, "grad_norm": 0.42146944999694824, "learning_rate": 3.787765140330636e-06, "loss": 0.3535, "step": 4000 }, { "epoch": 1.860641760967292, "grad_norm": 0.36168861389160156, "learning_rate": 3.7851401137035114e-06, "loss": 0.3267, "step": 4001 }, { "epoch": 1.861106805146489, "grad_norm": 0.4150577485561371, "learning_rate": 3.782515442889112e-06, "loss": 0.3572, "step": 4002 }, { "epoch": 1.8615718493256859, "grad_norm": 0.4027031362056732, "learning_rate": 3.7798911286561655e-06, "loss": 0.3416, "step": 4003 }, { "epoch": 1.862036893504883, "grad_norm": 0.4184225797653198, "learning_rate": 3.777267171773288e-06, "loss": 0.38, "step": 4004 }, { "epoch": 1.8625019376840801, "grad_norm": 0.3823773264884949, "learning_rate": 3.774643573008995e-06, "loss": 0.2838, "step": 4005 }, { "epoch": 1.862966981863277, "grad_norm": 0.4209512770175934, "learning_rate": 3.7720203331316946e-06, "loss": 0.3812, "step": 4006 }, { "epoch": 1.863432026042474, "grad_norm": 0.37146735191345215, "learning_rate": 3.769397452909695e-06, "loss": 0.3212, "step": 4007 }, { "epoch": 1.863897070221671, "grad_norm": 0.334895521402359, "learning_rate": 3.76677493311119e-06, "loss": 0.3148, "step": 4008 }, { "epoch": 1.8643621144008682, "grad_norm": 0.372598797082901, "learning_rate": 3.7641527745042784e-06, "loss": 0.3257, "step": 4009 }, { "epoch": 1.864827158580065, "grad_norm": 0.4964183270931244, "learning_rate": 3.7615309778569427e-06, "loss": 0.3659, "step": 4010 }, { "epoch": 1.865292202759262, "grad_norm": 0.37151283025741577, "learning_rate": 3.7589095439370676e-06, "loss": 0.3561, "step": 4011 }, { "epoch": 1.8657572469384591, "grad_norm": 0.43840986490249634, "learning_rate": 3.7562884735124273e-06, "loss": 0.3004, "step": 4012 }, { "epoch": 1.8662222911176563, "grad_norm": 0.4607786536216736, "learning_rate": 3.7536677673506926e-06, "loss": 0.4157, "step": 4013 }, { "epoch": 1.8666873352968532, "grad_norm": 0.3740638792514801, "learning_rate": 3.751047426219423e-06, "loss": 0.3024, "step": 4014 }, { "epoch": 1.86715237947605, "grad_norm": 0.41392412781715393, "learning_rate": 3.7484274508860776e-06, "loss": 0.3458, "step": 4015 }, { "epoch": 1.8676174236552474, "grad_norm": 0.46794068813323975, "learning_rate": 3.745807842118e-06, "loss": 0.3345, "step": 4016 }, { "epoch": 1.8680824678344443, "grad_norm": 0.400758296251297, "learning_rate": 3.7431886006824347e-06, "loss": 0.339, "step": 4017 }, { "epoch": 1.8685475120136412, "grad_norm": 0.3659023642539978, "learning_rate": 3.7405697273465125e-06, "loss": 0.3756, "step": 4018 }, { "epoch": 1.8690125561928383, "grad_norm": 0.4087202548980713, "learning_rate": 3.7379512228772618e-06, "loss": 0.3622, "step": 4019 }, { "epoch": 1.8694776003720355, "grad_norm": 0.3660109341144562, "learning_rate": 3.7353330880415963e-06, "loss": 0.3189, "step": 4020 }, { "epoch": 1.8699426445512324, "grad_norm": 0.3783299922943115, "learning_rate": 3.7327153236063295e-06, "loss": 0.3218, "step": 4021 }, { "epoch": 1.8704076887304293, "grad_norm": 0.3811236023902893, "learning_rate": 3.7300979303381576e-06, "loss": 0.3672, "step": 4022 }, { "epoch": 1.8708727329096264, "grad_norm": 0.3874422013759613, "learning_rate": 3.7274809090036757e-06, "loss": 0.377, "step": 4023 }, { "epoch": 1.8713377770888235, "grad_norm": 0.40507352352142334, "learning_rate": 3.724864260369364e-06, "loss": 0.3598, "step": 4024 }, { "epoch": 1.8718028212680204, "grad_norm": 0.3792964220046997, "learning_rate": 3.7222479852016015e-06, "loss": 0.3304, "step": 4025 }, { "epoch": 1.8722678654472173, "grad_norm": 0.3633670210838318, "learning_rate": 3.7196320842666467e-06, "loss": 0.2997, "step": 4026 }, { "epoch": 1.8727329096264145, "grad_norm": 0.3763364851474762, "learning_rate": 3.7170165583306595e-06, "loss": 0.3596, "step": 4027 }, { "epoch": 1.8731979538056116, "grad_norm": 0.3550674617290497, "learning_rate": 3.71440140815968e-06, "loss": 0.3433, "step": 4028 }, { "epoch": 1.8736629979848085, "grad_norm": 0.3978072702884674, "learning_rate": 3.7117866345196473e-06, "loss": 0.3898, "step": 4029 }, { "epoch": 1.8741280421640054, "grad_norm": 0.417102575302124, "learning_rate": 3.709172238176384e-06, "loss": 0.337, "step": 4030 }, { "epoch": 1.8745930863432028, "grad_norm": 0.36194002628326416, "learning_rate": 3.706558219895607e-06, "loss": 0.3347, "step": 4031 }, { "epoch": 1.8750581305223997, "grad_norm": 0.39445623755455017, "learning_rate": 3.7039445804429154e-06, "loss": 0.3578, "step": 4032 }, { "epoch": 1.8755231747015966, "grad_norm": 0.3856761157512665, "learning_rate": 3.7013313205838066e-06, "loss": 0.3493, "step": 4033 }, { "epoch": 1.8759882188807937, "grad_norm": 0.3411782383918762, "learning_rate": 3.698718441083657e-06, "loss": 0.3297, "step": 4034 }, { "epoch": 1.8764532630599908, "grad_norm": 0.421271950006485, "learning_rate": 3.6961059427077407e-06, "loss": 0.3634, "step": 4035 }, { "epoch": 1.8769183072391877, "grad_norm": 0.4099300801753998, "learning_rate": 3.693493826221215e-06, "loss": 0.3815, "step": 4036 }, { "epoch": 1.8773833514183846, "grad_norm": 0.37041401863098145, "learning_rate": 3.6908820923891235e-06, "loss": 0.3621, "step": 4037 }, { "epoch": 1.8778483955975818, "grad_norm": 0.36186453700065613, "learning_rate": 3.6882707419764053e-06, "loss": 0.3328, "step": 4038 }, { "epoch": 1.878313439776779, "grad_norm": 0.38483038544654846, "learning_rate": 3.6856597757478784e-06, "loss": 0.3676, "step": 4039 }, { "epoch": 1.8787784839559758, "grad_norm": 0.3237282335758209, "learning_rate": 3.6830491944682543e-06, "loss": 0.3129, "step": 4040 }, { "epoch": 1.8792435281351727, "grad_norm": 0.3765997588634491, "learning_rate": 3.6804389989021292e-06, "loss": 0.3639, "step": 4041 }, { "epoch": 1.8797085723143698, "grad_norm": 0.3259957432746887, "learning_rate": 3.6778291898139907e-06, "loss": 0.3183, "step": 4042 }, { "epoch": 1.880173616493567, "grad_norm": 0.37962499260902405, "learning_rate": 3.675219767968203e-06, "loss": 0.3432, "step": 4043 }, { "epoch": 1.8806386606727639, "grad_norm": 0.34864094853401184, "learning_rate": 3.6726107341290285e-06, "loss": 0.3045, "step": 4044 }, { "epoch": 1.881103704851961, "grad_norm": 0.3533942997455597, "learning_rate": 3.6700020890606068e-06, "loss": 0.3542, "step": 4045 }, { "epoch": 1.8815687490311581, "grad_norm": 0.351633220911026, "learning_rate": 3.667393833526972e-06, "loss": 0.3474, "step": 4046 }, { "epoch": 1.882033793210355, "grad_norm": 0.3609406650066376, "learning_rate": 3.664785968292036e-06, "loss": 0.3546, "step": 4047 }, { "epoch": 1.882498837389552, "grad_norm": 0.3869522511959076, "learning_rate": 3.6621784941196036e-06, "loss": 0.3715, "step": 4048 }, { "epoch": 1.882963881568749, "grad_norm": 0.36395883560180664, "learning_rate": 3.6595714117733583e-06, "loss": 0.3602, "step": 4049 }, { "epoch": 1.8834289257479462, "grad_norm": 0.36276188492774963, "learning_rate": 3.656964722016875e-06, "loss": 0.3046, "step": 4050 }, { "epoch": 1.883893969927143, "grad_norm": 0.3588871955871582, "learning_rate": 3.6543584256136076e-06, "loss": 0.3384, "step": 4051 }, { "epoch": 1.88435901410634, "grad_norm": 0.39765575528144836, "learning_rate": 3.6517525233269015e-06, "loss": 0.3879, "step": 4052 }, { "epoch": 1.8848240582855371, "grad_norm": 0.34334567189216614, "learning_rate": 3.6491470159199806e-06, "loss": 0.316, "step": 4053 }, { "epoch": 1.8852891024647342, "grad_norm": 0.3806763291358948, "learning_rate": 3.646541904155958e-06, "loss": 0.3474, "step": 4054 }, { "epoch": 1.8857541466439312, "grad_norm": 0.3665696978569031, "learning_rate": 3.643937188797826e-06, "loss": 0.3653, "step": 4055 }, { "epoch": 1.886219190823128, "grad_norm": 0.3573841154575348, "learning_rate": 3.641332870608466e-06, "loss": 0.3426, "step": 4056 }, { "epoch": 1.8866842350023252, "grad_norm": 0.3744082450866699, "learning_rate": 3.6387289503506375e-06, "loss": 0.3397, "step": 4057 }, { "epoch": 1.8871492791815223, "grad_norm": 0.37690484523773193, "learning_rate": 3.6361254287869886e-06, "loss": 0.3628, "step": 4058 }, { "epoch": 1.8876143233607192, "grad_norm": 0.34452709555625916, "learning_rate": 3.6335223066800466e-06, "loss": 0.3244, "step": 4059 }, { "epoch": 1.8880793675399163, "grad_norm": 0.4054635167121887, "learning_rate": 3.6309195847922284e-06, "loss": 0.3616, "step": 4060 }, { "epoch": 1.8885444117191135, "grad_norm": 0.40721654891967773, "learning_rate": 3.628317263885823e-06, "loss": 0.3951, "step": 4061 }, { "epoch": 1.8890094558983104, "grad_norm": 0.3469128906726837, "learning_rate": 3.625715344723012e-06, "loss": 0.2801, "step": 4062 }, { "epoch": 1.8894745000775073, "grad_norm": 0.35594481229782104, "learning_rate": 3.623113828065853e-06, "loss": 0.3517, "step": 4063 }, { "epoch": 1.8899395442567044, "grad_norm": 0.3937917947769165, "learning_rate": 3.6205127146762885e-06, "loss": 0.3679, "step": 4064 }, { "epoch": 1.8904045884359015, "grad_norm": 0.3829907476902008, "learning_rate": 3.617912005316142e-06, "loss": 0.3391, "step": 4065 }, { "epoch": 1.8908696326150984, "grad_norm": 0.36475563049316406, "learning_rate": 3.615311700747122e-06, "loss": 0.335, "step": 4066 }, { "epoch": 1.8913346767942953, "grad_norm": 0.3677665591239929, "learning_rate": 3.6127118017308116e-06, "loss": 0.3373, "step": 4067 }, { "epoch": 1.8917997209734925, "grad_norm": 0.3832724094390869, "learning_rate": 3.6101123090286814e-06, "loss": 0.3258, "step": 4068 }, { "epoch": 1.8922647651526896, "grad_norm": 0.3629043698310852, "learning_rate": 3.607513223402078e-06, "loss": 0.3766, "step": 4069 }, { "epoch": 1.8927298093318865, "grad_norm": 0.3398493826389313, "learning_rate": 3.6049145456122347e-06, "loss": 0.3425, "step": 4070 }, { "epoch": 1.8931948535110834, "grad_norm": 0.36889976263046265, "learning_rate": 3.6023162764202613e-06, "loss": 0.3559, "step": 4071 }, { "epoch": 1.8936598976902805, "grad_norm": 0.393019437789917, "learning_rate": 3.599718416587146e-06, "loss": 0.3255, "step": 4072 }, { "epoch": 1.8941249418694777, "grad_norm": 0.401734322309494, "learning_rate": 3.5971209668737626e-06, "loss": 0.3756, "step": 4073 }, { "epoch": 1.8945899860486746, "grad_norm": 0.34484103322029114, "learning_rate": 3.5945239280408596e-06, "loss": 0.321, "step": 4074 }, { "epoch": 1.8950550302278717, "grad_norm": 0.41480597853660583, "learning_rate": 3.591927300849069e-06, "loss": 0.3551, "step": 4075 }, { "epoch": 1.8955200744070688, "grad_norm": 0.3733346462249756, "learning_rate": 3.5893310860588997e-06, "loss": 0.3334, "step": 4076 }, { "epoch": 1.8959851185862657, "grad_norm": 0.3196105360984802, "learning_rate": 3.5867352844307433e-06, "loss": 0.3302, "step": 4077 }, { "epoch": 1.8964501627654626, "grad_norm": 0.3849073052406311, "learning_rate": 3.5841398967248654e-06, "loss": 0.3548, "step": 4078 }, { "epoch": 1.8969152069446598, "grad_norm": 0.38783520460128784, "learning_rate": 3.5815449237014144e-06, "loss": 0.3313, "step": 4079 }, { "epoch": 1.8973802511238569, "grad_norm": 0.3971535563468933, "learning_rate": 3.578950366120414e-06, "loss": 0.3581, "step": 4080 }, { "epoch": 1.8978452953030538, "grad_norm": 0.33792009949684143, "learning_rate": 3.5763562247417694e-06, "loss": 0.3269, "step": 4081 }, { "epoch": 1.8983103394822507, "grad_norm": 0.36532509326934814, "learning_rate": 3.5737625003252606e-06, "loss": 0.3342, "step": 4082 }, { "epoch": 1.8987753836614478, "grad_norm": 0.36312729120254517, "learning_rate": 3.5711691936305522e-06, "loss": 0.3322, "step": 4083 }, { "epoch": 1.899240427840645, "grad_norm": 0.36655622720718384, "learning_rate": 3.568576305417175e-06, "loss": 0.3621, "step": 4084 }, { "epoch": 1.8997054720198419, "grad_norm": 0.38222381472587585, "learning_rate": 3.5659838364445505e-06, "loss": 0.3598, "step": 4085 }, { "epoch": 1.9001705161990388, "grad_norm": 0.356871098279953, "learning_rate": 3.5633917874719642e-06, "loss": 0.3531, "step": 4086 }, { "epoch": 1.9006355603782359, "grad_norm": 0.34935128688812256, "learning_rate": 3.5608001592585895e-06, "loss": 0.3085, "step": 4087 }, { "epoch": 1.901100604557433, "grad_norm": 0.3709997236728668, "learning_rate": 3.55820895256347e-06, "loss": 0.3215, "step": 4088 }, { "epoch": 1.90156564873663, "grad_norm": 0.39799949526786804, "learning_rate": 3.5556181681455314e-06, "loss": 0.3538, "step": 4089 }, { "epoch": 1.902030692915827, "grad_norm": 0.35450243949890137, "learning_rate": 3.553027806763568e-06, "loss": 0.3455, "step": 4090 }, { "epoch": 1.9024957370950242, "grad_norm": 0.43028944730758667, "learning_rate": 3.5504378691762586e-06, "loss": 0.3607, "step": 4091 }, { "epoch": 1.902960781274221, "grad_norm": 0.35662081837654114, "learning_rate": 3.5478483561421497e-06, "loss": 0.312, "step": 4092 }, { "epoch": 1.903425825453418, "grad_norm": 0.38295355439186096, "learning_rate": 3.5452592684196707e-06, "loss": 0.3484, "step": 4093 }, { "epoch": 1.9038908696326151, "grad_norm": 0.38135382533073425, "learning_rate": 3.542670606767121e-06, "loss": 0.3646, "step": 4094 }, { "epoch": 1.9043559138118122, "grad_norm": 0.3967789113521576, "learning_rate": 3.540082371942682e-06, "loss": 0.3255, "step": 4095 }, { "epoch": 1.9048209579910091, "grad_norm": 0.4134833514690399, "learning_rate": 3.5374945647044e-06, "loss": 0.3487, "step": 4096 }, { "epoch": 1.905286002170206, "grad_norm": 0.36041533946990967, "learning_rate": 3.5349071858102056e-06, "loss": 0.3114, "step": 4097 }, { "epoch": 1.9057510463494032, "grad_norm": 0.40801483392715454, "learning_rate": 3.5323202360178976e-06, "loss": 0.3595, "step": 4098 }, { "epoch": 1.9062160905286003, "grad_norm": 0.39261582493782043, "learning_rate": 3.529733716085154e-06, "loss": 0.3617, "step": 4099 }, { "epoch": 1.9066811347077972, "grad_norm": 0.3464079797267914, "learning_rate": 3.5271476267695216e-06, "loss": 0.3262, "step": 4100 }, { "epoch": 1.9071461788869941, "grad_norm": 0.461972177028656, "learning_rate": 3.5245619688284277e-06, "loss": 0.3115, "step": 4101 }, { "epoch": 1.9076112230661912, "grad_norm": 0.35122260451316833, "learning_rate": 3.5219767430191653e-06, "loss": 0.3377, "step": 4102 }, { "epoch": 1.9080762672453884, "grad_norm": 0.4606049060821533, "learning_rate": 3.5193919500989093e-06, "loss": 0.3413, "step": 4103 }, { "epoch": 1.9085413114245853, "grad_norm": 0.40791839361190796, "learning_rate": 3.516807590824699e-06, "loss": 0.3605, "step": 4104 }, { "epoch": 1.9090063556037824, "grad_norm": 0.3770095407962799, "learning_rate": 3.514223665953455e-06, "loss": 0.3335, "step": 4105 }, { "epoch": 1.9094713997829795, "grad_norm": 0.3994191884994507, "learning_rate": 3.5116401762419643e-06, "loss": 0.2999, "step": 4106 }, { "epoch": 1.9099364439621764, "grad_norm": 0.4128780663013458, "learning_rate": 3.509057122446893e-06, "loss": 0.3623, "step": 4107 }, { "epoch": 1.9104014881413733, "grad_norm": 0.39665135741233826, "learning_rate": 3.506474505324772e-06, "loss": 0.3586, "step": 4108 }, { "epoch": 1.9108665323205705, "grad_norm": 0.3825541138648987, "learning_rate": 3.503892325632007e-06, "loss": 0.3502, "step": 4109 }, { "epoch": 1.9113315764997676, "grad_norm": 0.3846040368080139, "learning_rate": 3.5013105841248794e-06, "loss": 0.3397, "step": 4110 }, { "epoch": 1.9117966206789645, "grad_norm": 0.4117841422557831, "learning_rate": 3.4987292815595376e-06, "loss": 0.3676, "step": 4111 }, { "epoch": 1.9122616648581614, "grad_norm": 0.38701215386390686, "learning_rate": 3.496148418692006e-06, "loss": 0.3433, "step": 4112 }, { "epoch": 1.9127267090373585, "grad_norm": 0.3598323464393616, "learning_rate": 3.4935679962781722e-06, "loss": 0.346, "step": 4113 }, { "epoch": 1.9131917532165557, "grad_norm": 0.40813809633255005, "learning_rate": 3.4909880150738057e-06, "loss": 0.3356, "step": 4114 }, { "epoch": 1.9136567973957526, "grad_norm": 0.3636961579322815, "learning_rate": 3.4884084758345365e-06, "loss": 0.3225, "step": 4115 }, { "epoch": 1.9141218415749495, "grad_norm": 0.3554372489452362, "learning_rate": 3.4858293793158727e-06, "loss": 0.3349, "step": 4116 }, { "epoch": 1.9145868857541466, "grad_norm": 0.4285871684551239, "learning_rate": 3.4832507262731876e-06, "loss": 0.3442, "step": 4117 }, { "epoch": 1.9150519299333437, "grad_norm": 0.38151922821998596, "learning_rate": 3.4806725174617305e-06, "loss": 0.3202, "step": 4118 }, { "epoch": 1.9155169741125406, "grad_norm": 0.4228741228580475, "learning_rate": 3.4780947536366115e-06, "loss": 0.3516, "step": 4119 }, { "epoch": 1.9159820182917378, "grad_norm": 0.3742198944091797, "learning_rate": 3.4755174355528214e-06, "loss": 0.3673, "step": 4120 }, { "epoch": 1.9164470624709349, "grad_norm": 0.3714161217212677, "learning_rate": 3.4729405639652102e-06, "loss": 0.3152, "step": 4121 }, { "epoch": 1.9169121066501318, "grad_norm": 0.3599150478839874, "learning_rate": 3.470364139628504e-06, "loss": 0.3102, "step": 4122 }, { "epoch": 1.9173771508293287, "grad_norm": 0.4052508771419525, "learning_rate": 3.467788163297294e-06, "loss": 0.3409, "step": 4123 }, { "epoch": 1.9178421950085258, "grad_norm": 0.39652350544929504, "learning_rate": 3.465212635726045e-06, "loss": 0.4028, "step": 4124 }, { "epoch": 1.918307239187723, "grad_norm": 0.3475923240184784, "learning_rate": 3.462637557669084e-06, "loss": 0.3194, "step": 4125 }, { "epoch": 1.9187722833669199, "grad_norm": 0.35219481587409973, "learning_rate": 3.460062929880612e-06, "loss": 0.3094, "step": 4126 }, { "epoch": 1.9192373275461168, "grad_norm": 0.3954284191131592, "learning_rate": 3.4574887531146926e-06, "loss": 0.3428, "step": 4127 }, { "epoch": 1.9197023717253139, "grad_norm": 0.4456411600112915, "learning_rate": 3.4549150281252635e-06, "loss": 0.3701, "step": 4128 }, { "epoch": 1.920167415904511, "grad_norm": 0.3940032720565796, "learning_rate": 3.4523417556661244e-06, "loss": 0.3499, "step": 4129 }, { "epoch": 1.920632460083708, "grad_norm": 0.36356043815612793, "learning_rate": 3.4497689364909483e-06, "loss": 0.362, "step": 4130 }, { "epoch": 1.9210975042629048, "grad_norm": 0.34411749243736267, "learning_rate": 3.4471965713532675e-06, "loss": 0.3274, "step": 4131 }, { "epoch": 1.921562548442102, "grad_norm": 0.36001211404800415, "learning_rate": 3.444624661006491e-06, "loss": 0.3625, "step": 4132 }, { "epoch": 1.922027592621299, "grad_norm": 0.3642420172691345, "learning_rate": 3.4420532062038846e-06, "loss": 0.3257, "step": 4133 }, { "epoch": 1.922492636800496, "grad_norm": 0.3693174719810486, "learning_rate": 3.43948220769859e-06, "loss": 0.3414, "step": 4134 }, { "epoch": 1.922957680979693, "grad_norm": 0.4484702944755554, "learning_rate": 3.4369116662436074e-06, "loss": 0.3864, "step": 4135 }, { "epoch": 1.9234227251588902, "grad_norm": 0.3540983200073242, "learning_rate": 3.4343415825918102e-06, "loss": 0.3052, "step": 4136 }, { "epoch": 1.9238877693380871, "grad_norm": 0.35332098603248596, "learning_rate": 3.4317719574959307e-06, "loss": 0.3524, "step": 4137 }, { "epoch": 1.924352813517284, "grad_norm": 0.41718167066574097, "learning_rate": 3.4292027917085733e-06, "loss": 0.3519, "step": 4138 }, { "epoch": 1.9248178576964812, "grad_norm": 0.42364948987960815, "learning_rate": 3.4266340859822023e-06, "loss": 0.3231, "step": 4139 }, { "epoch": 1.9252829018756783, "grad_norm": 0.38962802290916443, "learning_rate": 3.424065841069152e-06, "loss": 0.3384, "step": 4140 }, { "epoch": 1.9257479460548752, "grad_norm": 0.37895727157592773, "learning_rate": 3.421498057721617e-06, "loss": 0.3477, "step": 4141 }, { "epoch": 1.926212990234072, "grad_norm": 0.40839117765426636, "learning_rate": 3.4189307366916635e-06, "loss": 0.3364, "step": 4142 }, { "epoch": 1.9266780344132692, "grad_norm": 0.39300408959388733, "learning_rate": 3.4163638787312152e-06, "loss": 0.3477, "step": 4143 }, { "epoch": 1.9271430785924664, "grad_norm": 0.39907246828079224, "learning_rate": 3.4137974845920616e-06, "loss": 0.3376, "step": 4144 }, { "epoch": 1.9276081227716633, "grad_norm": 0.38821595907211304, "learning_rate": 3.411231555025861e-06, "loss": 0.3703, "step": 4145 }, { "epoch": 1.9280731669508602, "grad_norm": 0.3863977789878845, "learning_rate": 3.4086660907841307e-06, "loss": 0.3541, "step": 4146 }, { "epoch": 1.9285382111300573, "grad_norm": 0.36651626229286194, "learning_rate": 3.4061010926182557e-06, "loss": 0.3067, "step": 4147 }, { "epoch": 1.9290032553092544, "grad_norm": 0.4503706693649292, "learning_rate": 3.403536561279479e-06, "loss": 0.3646, "step": 4148 }, { "epoch": 1.9294682994884513, "grad_norm": 0.417574942111969, "learning_rate": 3.400972497518914e-06, "loss": 0.3605, "step": 4149 }, { "epoch": 1.9299333436676485, "grad_norm": 0.3759341239929199, "learning_rate": 3.398408902087529e-06, "loss": 0.3452, "step": 4150 }, { "epoch": 1.9303983878468456, "grad_norm": 0.36154696345329285, "learning_rate": 3.395845775736163e-06, "loss": 0.3387, "step": 4151 }, { "epoch": 1.9308634320260425, "grad_norm": 0.35659950971603394, "learning_rate": 3.3932831192155115e-06, "loss": 0.331, "step": 4152 }, { "epoch": 1.9313284762052394, "grad_norm": 0.36559152603149414, "learning_rate": 3.3907209332761383e-06, "loss": 0.3622, "step": 4153 }, { "epoch": 1.9317935203844365, "grad_norm": 0.365607887506485, "learning_rate": 3.3881592186684616e-06, "loss": 0.3181, "step": 4154 }, { "epoch": 1.9322585645636337, "grad_norm": 0.3874776065349579, "learning_rate": 3.3855979761427705e-06, "loss": 0.3239, "step": 4155 }, { "epoch": 1.9327236087428306, "grad_norm": 0.3716128170490265, "learning_rate": 3.383037206449207e-06, "loss": 0.3572, "step": 4156 }, { "epoch": 1.9331886529220275, "grad_norm": 0.38972359895706177, "learning_rate": 3.3804769103377827e-06, "loss": 0.3569, "step": 4157 }, { "epoch": 1.9336536971012246, "grad_norm": 0.3612194061279297, "learning_rate": 3.377917088558364e-06, "loss": 0.3391, "step": 4158 }, { "epoch": 1.9341187412804217, "grad_norm": 0.3686378002166748, "learning_rate": 3.3753577418606844e-06, "loss": 0.3359, "step": 4159 }, { "epoch": 1.9345837854596186, "grad_norm": 0.35202595591545105, "learning_rate": 3.3727988709943303e-06, "loss": 0.3094, "step": 4160 }, { "epoch": 1.9350488296388155, "grad_norm": 0.3890860676765442, "learning_rate": 3.370240476708759e-06, "loss": 0.346, "step": 4161 }, { "epoch": 1.9355138738180129, "grad_norm": 0.3682956397533417, "learning_rate": 3.367682559753277e-06, "loss": 0.3363, "step": 4162 }, { "epoch": 1.9359789179972098, "grad_norm": 0.37430447340011597, "learning_rate": 3.36512512087706e-06, "loss": 0.335, "step": 4163 }, { "epoch": 1.9364439621764067, "grad_norm": 0.371165007352829, "learning_rate": 3.3625681608291393e-06, "loss": 0.3534, "step": 4164 }, { "epoch": 1.9369090063556038, "grad_norm": 0.3415764570236206, "learning_rate": 3.360011680358409e-06, "loss": 0.3183, "step": 4165 }, { "epoch": 1.937374050534801, "grad_norm": 0.41029369831085205, "learning_rate": 3.3574556802136164e-06, "loss": 0.3807, "step": 4166 }, { "epoch": 1.9378390947139978, "grad_norm": 0.41946086287498474, "learning_rate": 3.354900161143377e-06, "loss": 0.3535, "step": 4167 }, { "epoch": 1.9383041388931948, "grad_norm": 0.36154383420944214, "learning_rate": 3.352345123896158e-06, "loss": 0.3153, "step": 4168 }, { "epoch": 1.9387691830723919, "grad_norm": 0.3600989878177643, "learning_rate": 3.3497905692202892e-06, "loss": 0.3591, "step": 4169 }, { "epoch": 1.939234227251589, "grad_norm": 0.37105801701545715, "learning_rate": 3.347236497863957e-06, "loss": 0.3842, "step": 4170 }, { "epoch": 1.939699271430786, "grad_norm": 0.32860368490219116, "learning_rate": 3.3446829105752103e-06, "loss": 0.289, "step": 4171 }, { "epoch": 1.9401643156099828, "grad_norm": 0.3648563325405121, "learning_rate": 3.34212980810195e-06, "loss": 0.3297, "step": 4172 }, { "epoch": 1.94062935978918, "grad_norm": 0.3792143166065216, "learning_rate": 3.3395771911919416e-06, "loss": 0.3211, "step": 4173 }, { "epoch": 1.941094403968377, "grad_norm": 0.4156646430492401, "learning_rate": 3.3370250605928013e-06, "loss": 0.3774, "step": 4174 }, { "epoch": 1.941559448147574, "grad_norm": 0.3592356741428375, "learning_rate": 3.33447341705201e-06, "loss": 0.3604, "step": 4175 }, { "epoch": 1.9420244923267709, "grad_norm": 0.4310251474380493, "learning_rate": 3.3319222613169e-06, "loss": 0.3351, "step": 4176 }, { "epoch": 1.9424895365059682, "grad_norm": 0.40586546063423157, "learning_rate": 3.3293715941346676e-06, "loss": 0.312, "step": 4177 }, { "epoch": 1.9429545806851651, "grad_norm": 0.374381422996521, "learning_rate": 3.3268214162523563e-06, "loss": 0.3451, "step": 4178 }, { "epoch": 1.943419624864362, "grad_norm": 0.3669213056564331, "learning_rate": 3.324271728416877e-06, "loss": 0.3387, "step": 4179 }, { "epoch": 1.9438846690435592, "grad_norm": 0.39169323444366455, "learning_rate": 3.321722531374988e-06, "loss": 0.3568, "step": 4180 }, { "epoch": 1.9443497132227563, "grad_norm": 0.3700978457927704, "learning_rate": 3.3191738258733085e-06, "loss": 0.3265, "step": 4181 }, { "epoch": 1.9448147574019532, "grad_norm": 0.46921029686927795, "learning_rate": 3.316625612658315e-06, "loss": 0.3883, "step": 4182 }, { "epoch": 1.94527980158115, "grad_norm": 0.38272857666015625, "learning_rate": 3.314077892476334e-06, "loss": 0.3328, "step": 4183 }, { "epoch": 1.9457448457603472, "grad_norm": 0.40746691823005676, "learning_rate": 3.3115306660735564e-06, "loss": 0.3558, "step": 4184 }, { "epoch": 1.9462098899395444, "grad_norm": 0.33452197909355164, "learning_rate": 3.308983934196018e-06, "loss": 0.3091, "step": 4185 }, { "epoch": 1.9466749341187413, "grad_norm": 0.4103114902973175, "learning_rate": 3.3064376975896197e-06, "loss": 0.3777, "step": 4186 }, { "epoch": 1.9471399782979382, "grad_norm": 0.3644181489944458, "learning_rate": 3.3038919570001086e-06, "loss": 0.3238, "step": 4187 }, { "epoch": 1.9476050224771353, "grad_norm": 0.42216435074806213, "learning_rate": 3.301346713173096e-06, "loss": 0.3551, "step": 4188 }, { "epoch": 1.9480700666563324, "grad_norm": 0.39056453108787537, "learning_rate": 3.2988019668540373e-06, "loss": 0.3612, "step": 4189 }, { "epoch": 1.9485351108355293, "grad_norm": 0.37854042649269104, "learning_rate": 3.2962577187882517e-06, "loss": 0.3339, "step": 4190 }, { "epoch": 1.9490001550147265, "grad_norm": 0.34841084480285645, "learning_rate": 3.2937139697209043e-06, "loss": 0.305, "step": 4191 }, { "epoch": 1.9494651991939236, "grad_norm": 0.4083377718925476, "learning_rate": 3.2911707203970213e-06, "loss": 0.3892, "step": 4192 }, { "epoch": 1.9499302433731205, "grad_norm": 0.3734639585018158, "learning_rate": 3.2886279715614754e-06, "loss": 0.327, "step": 4193 }, { "epoch": 1.9503952875523174, "grad_norm": 0.34090444445610046, "learning_rate": 3.286085723959001e-06, "loss": 0.3013, "step": 4194 }, { "epoch": 1.9508603317315145, "grad_norm": 0.42237943410873413, "learning_rate": 3.283543978334177e-06, "loss": 0.3566, "step": 4195 }, { "epoch": 1.9513253759107116, "grad_norm": 0.36327943205833435, "learning_rate": 3.281002735431442e-06, "loss": 0.3294, "step": 4196 }, { "epoch": 1.9517904200899086, "grad_norm": 0.3818821310997009, "learning_rate": 3.2784619959950832e-06, "loss": 0.3715, "step": 4197 }, { "epoch": 1.9522554642691055, "grad_norm": 0.3964533805847168, "learning_rate": 3.2759217607692427e-06, "loss": 0.3286, "step": 4198 }, { "epoch": 1.9527205084483026, "grad_norm": 0.404161661863327, "learning_rate": 3.2733820304979136e-06, "loss": 0.3412, "step": 4199 }, { "epoch": 1.9531855526274997, "grad_norm": 0.397344708442688, "learning_rate": 3.2708428059249437e-06, "loss": 0.3277, "step": 4200 }, { "epoch": 1.9536505968066966, "grad_norm": 0.3819953501224518, "learning_rate": 3.268304087794027e-06, "loss": 0.325, "step": 4201 }, { "epoch": 1.9541156409858935, "grad_norm": 0.40046122670173645, "learning_rate": 3.2657658768487164e-06, "loss": 0.3437, "step": 4202 }, { "epoch": 1.9545806851650906, "grad_norm": 0.3873307704925537, "learning_rate": 3.26322817383241e-06, "loss": 0.3454, "step": 4203 }, { "epoch": 1.9550457293442878, "grad_norm": 0.3931639790534973, "learning_rate": 3.260690979488361e-06, "loss": 0.3232, "step": 4204 }, { "epoch": 1.9555107735234847, "grad_norm": 0.48349693417549133, "learning_rate": 3.258154294559671e-06, "loss": 0.4106, "step": 4205 }, { "epoch": 1.9559758177026818, "grad_norm": 0.3327430486679077, "learning_rate": 3.255618119789298e-06, "loss": 0.305, "step": 4206 }, { "epoch": 1.956440861881879, "grad_norm": 0.4101545512676239, "learning_rate": 3.2530824559200415e-06, "loss": 0.3245, "step": 4207 }, { "epoch": 1.9569059060610758, "grad_norm": 0.4183330833911896, "learning_rate": 3.2505473036945588e-06, "loss": 0.3395, "step": 4208 }, { "epoch": 1.9573709502402727, "grad_norm": 0.463460236787796, "learning_rate": 3.2480126638553533e-06, "loss": 0.3707, "step": 4209 }, { "epoch": 1.9578359944194699, "grad_norm": 0.3990997076034546, "learning_rate": 3.2454785371447817e-06, "loss": 0.3211, "step": 4210 }, { "epoch": 1.958301038598667, "grad_norm": 0.38769689202308655, "learning_rate": 3.2429449243050464e-06, "loss": 0.3219, "step": 4211 }, { "epoch": 1.958766082777864, "grad_norm": 0.3971726596355438, "learning_rate": 3.2404118260782047e-06, "loss": 0.3451, "step": 4212 }, { "epoch": 1.9592311269570608, "grad_norm": 0.3939190208911896, "learning_rate": 3.2378792432061557e-06, "loss": 0.3454, "step": 4213 }, { "epoch": 1.959696171136258, "grad_norm": 0.3693125247955322, "learning_rate": 3.2353471764306567e-06, "loss": 0.3299, "step": 4214 }, { "epoch": 1.960161215315455, "grad_norm": 0.42611974477767944, "learning_rate": 3.2328156264933043e-06, "loss": 0.3396, "step": 4215 }, { "epoch": 1.960626259494652, "grad_norm": 0.4172080159187317, "learning_rate": 3.23028459413555e-06, "loss": 0.3579, "step": 4216 }, { "epoch": 1.9610913036738489, "grad_norm": 0.4222067594528198, "learning_rate": 3.227754080098694e-06, "loss": 0.3634, "step": 4217 }, { "epoch": 1.961556347853046, "grad_norm": 0.3900541365146637, "learning_rate": 3.2252240851238786e-06, "loss": 0.3096, "step": 4218 }, { "epoch": 1.9620213920322431, "grad_norm": 0.3957304060459137, "learning_rate": 3.2226946099521026e-06, "loss": 0.3681, "step": 4219 }, { "epoch": 1.96248643621144, "grad_norm": 0.44212228059768677, "learning_rate": 3.2201656553242054e-06, "loss": 0.3601, "step": 4220 }, { "epoch": 1.9629514803906372, "grad_norm": 0.38398414850234985, "learning_rate": 3.217637221980878e-06, "loss": 0.308, "step": 4221 }, { "epoch": 1.9634165245698343, "grad_norm": 0.4060637056827545, "learning_rate": 3.215109310662656e-06, "loss": 0.342, "step": 4222 }, { "epoch": 1.9638815687490312, "grad_norm": 0.4322217106819153, "learning_rate": 3.2125819221099265e-06, "loss": 0.3304, "step": 4223 }, { "epoch": 1.964346612928228, "grad_norm": 0.37583181262016296, "learning_rate": 3.210055057062917e-06, "loss": 0.2955, "step": 4224 }, { "epoch": 1.9648116571074252, "grad_norm": 0.37987688183784485, "learning_rate": 3.2075287162617084e-06, "loss": 0.3506, "step": 4225 }, { "epoch": 1.9652767012866224, "grad_norm": 0.42965471744537354, "learning_rate": 3.2050029004462226e-06, "loss": 0.3463, "step": 4226 }, { "epoch": 1.9657417454658193, "grad_norm": 0.3723207712173462, "learning_rate": 3.2024776103562304e-06, "loss": 0.3088, "step": 4227 }, { "epoch": 1.9662067896450162, "grad_norm": 0.3601841628551483, "learning_rate": 3.199952846731349e-06, "loss": 0.3396, "step": 4228 }, { "epoch": 1.9666718338242133, "grad_norm": 0.45041221380233765, "learning_rate": 3.197428610311042e-06, "loss": 0.3399, "step": 4229 }, { "epoch": 1.9671368780034104, "grad_norm": 0.3874083459377289, "learning_rate": 3.194904901834613e-06, "loss": 0.3283, "step": 4230 }, { "epoch": 1.9676019221826073, "grad_norm": 0.3825784921646118, "learning_rate": 3.19238172204122e-06, "loss": 0.3291, "step": 4231 }, { "epoch": 1.9680669663618042, "grad_norm": 0.3952145278453827, "learning_rate": 3.1898590716698574e-06, "loss": 0.3482, "step": 4232 }, { "epoch": 1.9685320105410014, "grad_norm": 0.4280111789703369, "learning_rate": 3.1873369514593712e-06, "loss": 0.3488, "step": 4233 }, { "epoch": 1.9689970547201985, "grad_norm": 0.44719386100769043, "learning_rate": 3.184815362148448e-06, "loss": 0.3692, "step": 4234 }, { "epoch": 1.9694620988993954, "grad_norm": 0.4005783796310425, "learning_rate": 3.1822943044756222e-06, "loss": 0.3451, "step": 4235 }, { "epoch": 1.9699271430785925, "grad_norm": 0.3623179495334625, "learning_rate": 3.1797737791792672e-06, "loss": 0.3514, "step": 4236 }, { "epoch": 1.9703921872577896, "grad_norm": 0.4718107581138611, "learning_rate": 3.177253786997609e-06, "loss": 0.3607, "step": 4237 }, { "epoch": 1.9708572314369865, "grad_norm": 0.44240835309028625, "learning_rate": 3.1747343286687065e-06, "loss": 0.3568, "step": 4238 }, { "epoch": 1.9713222756161835, "grad_norm": 0.3883517384529114, "learning_rate": 3.1722154049304728e-06, "loss": 0.3407, "step": 4239 }, { "epoch": 1.9717873197953806, "grad_norm": 0.4095773994922638, "learning_rate": 3.1696970165206564e-06, "loss": 0.3302, "step": 4240 }, { "epoch": 1.9722523639745777, "grad_norm": 0.4610760509967804, "learning_rate": 3.167179164176857e-06, "loss": 0.3149, "step": 4241 }, { "epoch": 1.9727174081537746, "grad_norm": 0.4434893727302551, "learning_rate": 3.1646618486365068e-06, "loss": 0.3795, "step": 4242 }, { "epoch": 1.9731824523329715, "grad_norm": 0.3962785601615906, "learning_rate": 3.1621450706368904e-06, "loss": 0.3624, "step": 4243 }, { "epoch": 1.9736474965121686, "grad_norm": 0.3942055404186249, "learning_rate": 3.15962883091513e-06, "loss": 0.3408, "step": 4244 }, { "epoch": 1.9741125406913658, "grad_norm": 0.4068949520587921, "learning_rate": 3.1571131302081916e-06, "loss": 0.3692, "step": 4245 }, { "epoch": 1.9745775848705627, "grad_norm": 0.46068617701530457, "learning_rate": 3.154597969252883e-06, "loss": 0.3489, "step": 4246 }, { "epoch": 1.9750426290497596, "grad_norm": 0.47146105766296387, "learning_rate": 3.1520833487858547e-06, "loss": 0.3245, "step": 4247 }, { "epoch": 1.9755076732289567, "grad_norm": 0.3786758482456207, "learning_rate": 3.1495692695435966e-06, "loss": 0.3297, "step": 4248 }, { "epoch": 1.9759727174081538, "grad_norm": 0.4309166967868805, "learning_rate": 3.147055732262444e-06, "loss": 0.3595, "step": 4249 }, { "epoch": 1.9764377615873507, "grad_norm": 0.4204605519771576, "learning_rate": 3.1445427376785687e-06, "loss": 0.3285, "step": 4250 }, { "epoch": 1.9769028057665479, "grad_norm": 0.4231455624103546, "learning_rate": 3.142030286527987e-06, "loss": 0.3434, "step": 4251 }, { "epoch": 1.977367849945745, "grad_norm": 0.39683791995048523, "learning_rate": 3.1395183795465565e-06, "loss": 0.3404, "step": 4252 }, { "epoch": 1.977832894124942, "grad_norm": 0.43231719732284546, "learning_rate": 3.137007017469971e-06, "loss": 0.3534, "step": 4253 }, { "epoch": 1.9782979383041388, "grad_norm": 0.45599350333213806, "learning_rate": 3.1344962010337703e-06, "loss": 0.3811, "step": 4254 }, { "epoch": 1.978762982483336, "grad_norm": 0.4510321021080017, "learning_rate": 3.131985930973329e-06, "loss": 0.374, "step": 4255 }, { "epoch": 1.979228026662533, "grad_norm": 0.44332581758499146, "learning_rate": 3.1294762080238672e-06, "loss": 0.3416, "step": 4256 }, { "epoch": 1.97969307084173, "grad_norm": 0.34618374705314636, "learning_rate": 3.12696703292044e-06, "loss": 0.2877, "step": 4257 }, { "epoch": 1.9801581150209269, "grad_norm": 0.455598384141922, "learning_rate": 3.1244584063979467e-06, "loss": 0.3718, "step": 4258 }, { "epoch": 1.980623159200124, "grad_norm": 0.4085701107978821, "learning_rate": 3.121950329191119e-06, "loss": 0.3747, "step": 4259 }, { "epoch": 1.9810882033793211, "grad_norm": 0.3253307044506073, "learning_rate": 3.1194428020345375e-06, "loss": 0.3125, "step": 4260 }, { "epoch": 1.981553247558518, "grad_norm": 0.36619746685028076, "learning_rate": 3.11693582566261e-06, "loss": 0.343, "step": 4261 }, { "epoch": 1.982018291737715, "grad_norm": 0.47953325510025024, "learning_rate": 3.1144294008095942e-06, "loss": 0.3779, "step": 4262 }, { "epoch": 1.982483335916912, "grad_norm": 0.4037295877933502, "learning_rate": 3.111923528209577e-06, "loss": 0.3665, "step": 4263 }, { "epoch": 1.9829483800961092, "grad_norm": 0.376261442899704, "learning_rate": 3.1094182085964935e-06, "loss": 0.3332, "step": 4264 }, { "epoch": 1.983413424275306, "grad_norm": 0.4503709673881531, "learning_rate": 3.1069134427041047e-06, "loss": 0.3675, "step": 4265 }, { "epoch": 1.9838784684545032, "grad_norm": 0.4101594388484955, "learning_rate": 3.1044092312660213e-06, "loss": 0.3787, "step": 4266 }, { "epoch": 1.9843435126337003, "grad_norm": 0.45172303915023804, "learning_rate": 3.101905575015682e-06, "loss": 0.358, "step": 4267 }, { "epoch": 1.9848085568128973, "grad_norm": 0.3164076805114746, "learning_rate": 3.0994024746863692e-06, "loss": 0.3028, "step": 4268 }, { "epoch": 1.9852736009920942, "grad_norm": 0.3954414427280426, "learning_rate": 3.0968999310111993e-06, "loss": 0.3998, "step": 4269 }, { "epoch": 1.9857386451712913, "grad_norm": 0.40084972977638245, "learning_rate": 3.0943979447231287e-06, "loss": 0.3176, "step": 4270 }, { "epoch": 1.9862036893504884, "grad_norm": 0.4292057752609253, "learning_rate": 3.091896516554945e-06, "loss": 0.3524, "step": 4271 }, { "epoch": 1.9866687335296853, "grad_norm": 0.4974363148212433, "learning_rate": 3.0893956472392805e-06, "loss": 0.3649, "step": 4272 }, { "epoch": 1.9871337777088822, "grad_norm": 0.37316155433654785, "learning_rate": 3.086895337508594e-06, "loss": 0.3477, "step": 4273 }, { "epoch": 1.9875988218880793, "grad_norm": 0.3752647638320923, "learning_rate": 3.0843955880951906e-06, "loss": 0.3585, "step": 4274 }, { "epoch": 1.9880638660672765, "grad_norm": 0.3850919008255005, "learning_rate": 3.081896399731202e-06, "loss": 0.3594, "step": 4275 }, { "epoch": 1.9885289102464734, "grad_norm": 0.40935975313186646, "learning_rate": 3.0793977731486034e-06, "loss": 0.3427, "step": 4276 }, { "epoch": 1.9889939544256703, "grad_norm": 0.4816490411758423, "learning_rate": 3.0768997090791995e-06, "loss": 0.4007, "step": 4277 }, { "epoch": 1.9894589986048674, "grad_norm": 0.3938618302345276, "learning_rate": 3.0744022082546356e-06, "loss": 0.3432, "step": 4278 }, { "epoch": 1.9899240427840645, "grad_norm": 0.34527865052223206, "learning_rate": 3.071905271406384e-06, "loss": 0.3668, "step": 4279 }, { "epoch": 1.9903890869632614, "grad_norm": 0.3559629023075104, "learning_rate": 3.0694088992657617e-06, "loss": 0.3488, "step": 4280 }, { "epoch": 1.9908541311424586, "grad_norm": 0.4233744144439697, "learning_rate": 3.066913092563913e-06, "loss": 0.3593, "step": 4281 }, { "epoch": 1.9913191753216557, "grad_norm": 0.36039772629737854, "learning_rate": 3.064417852031822e-06, "loss": 0.3275, "step": 4282 }, { "epoch": 1.9917842195008526, "grad_norm": 0.41235482692718506, "learning_rate": 3.0619231784003e-06, "loss": 0.3885, "step": 4283 }, { "epoch": 1.9922492636800495, "grad_norm": 0.3795316219329834, "learning_rate": 3.059429072400001e-06, "loss": 0.3176, "step": 4284 }, { "epoch": 1.9927143078592466, "grad_norm": 0.4195452332496643, "learning_rate": 3.0569355347614033e-06, "loss": 0.3609, "step": 4285 }, { "epoch": 1.9931793520384438, "grad_norm": 0.32910481095314026, "learning_rate": 3.054442566214827e-06, "loss": 0.3151, "step": 4286 }, { "epoch": 1.9936443962176407, "grad_norm": 0.3915290832519531, "learning_rate": 3.051950167490422e-06, "loss": 0.3448, "step": 4287 }, { "epoch": 1.9941094403968376, "grad_norm": 0.39752107858657837, "learning_rate": 3.049458339318169e-06, "loss": 0.3286, "step": 4288 }, { "epoch": 1.9945744845760347, "grad_norm": 0.45507895946502686, "learning_rate": 3.0469670824278863e-06, "loss": 0.3607, "step": 4289 }, { "epoch": 1.9950395287552318, "grad_norm": 0.3670455515384674, "learning_rate": 3.044476397549221e-06, "loss": 0.332, "step": 4290 }, { "epoch": 1.9955045729344287, "grad_norm": 0.4452548623085022, "learning_rate": 3.0419862854116554e-06, "loss": 0.3882, "step": 4291 }, { "epoch": 1.9959696171136256, "grad_norm": 0.3838987946510315, "learning_rate": 3.0394967467445014e-06, "loss": 0.3492, "step": 4292 }, { "epoch": 1.9964346612928228, "grad_norm": 0.43697041273117065, "learning_rate": 3.0370077822769073e-06, "loss": 0.3688, "step": 4293 }, { "epoch": 1.99689970547202, "grad_norm": 0.38790035247802734, "learning_rate": 3.034519392737847e-06, "loss": 0.2995, "step": 4294 }, { "epoch": 1.9973647496512168, "grad_norm": 0.41673675179481506, "learning_rate": 3.0320315788561334e-06, "loss": 0.3068, "step": 4295 }, { "epoch": 1.997829793830414, "grad_norm": 0.5042394995689392, "learning_rate": 3.029544341360402e-06, "loss": 0.3846, "step": 4296 }, { "epoch": 1.998294838009611, "grad_norm": 0.3903370201587677, "learning_rate": 3.0270576809791273e-06, "loss": 0.3873, "step": 4297 }, { "epoch": 1.998759882188808, "grad_norm": 0.4303033947944641, "learning_rate": 3.02457159844061e-06, "loss": 0.3656, "step": 4298 }, { "epoch": 1.9992249263680049, "grad_norm": 0.44149455428123474, "learning_rate": 3.022086094472986e-06, "loss": 0.3233, "step": 4299 }, { "epoch": 1.999689970547202, "grad_norm": 0.38690122961997986, "learning_rate": 3.019601169804216e-06, "loss": 0.3404, "step": 4300 }, { "epoch": 2.000155014726399, "grad_norm": 0.7339352369308472, "learning_rate": 3.0171168251620974e-06, "loss": 0.5187, "step": 4301 }, { "epoch": 2.000620058905596, "grad_norm": 0.42907798290252686, "learning_rate": 3.01463306127425e-06, "loss": 0.3366, "step": 4302 }, { "epoch": 2.001085103084793, "grad_norm": 0.4182564616203308, "learning_rate": 3.012149878868132e-06, "loss": 0.2779, "step": 4303 }, { "epoch": 2.0015501472639903, "grad_norm": 0.37000566720962524, "learning_rate": 3.009667278671024e-06, "loss": 0.3469, "step": 4304 }, { "epoch": 2.002015191443187, "grad_norm": 0.36970627307891846, "learning_rate": 3.0071852614100427e-06, "loss": 0.2855, "step": 4305 }, { "epoch": 2.002480235622384, "grad_norm": 0.39976659417152405, "learning_rate": 3.004703827812128e-06, "loss": 0.3605, "step": 4306 }, { "epoch": 2.002945279801581, "grad_norm": 0.3608565032482147, "learning_rate": 3.0022229786040526e-06, "loss": 0.2864, "step": 4307 }, { "epoch": 2.0034103239807783, "grad_norm": 0.37464091181755066, "learning_rate": 2.999742714512415e-06, "loss": 0.3287, "step": 4308 }, { "epoch": 2.0038753681599752, "grad_norm": 0.37484025955200195, "learning_rate": 2.997263036263647e-06, "loss": 0.3256, "step": 4309 }, { "epoch": 2.004340412339172, "grad_norm": 0.34249240159988403, "learning_rate": 2.9947839445840045e-06, "loss": 0.2974, "step": 4310 }, { "epoch": 2.004805456518369, "grad_norm": 0.38859909772872925, "learning_rate": 2.9923054401995745e-06, "loss": 0.3534, "step": 4311 }, { "epoch": 2.0052705006975664, "grad_norm": 0.3693307936191559, "learning_rate": 2.9898275238362686e-06, "loss": 0.3355, "step": 4312 }, { "epoch": 2.0057355448767633, "grad_norm": 0.3429759442806244, "learning_rate": 2.98735019621983e-06, "loss": 0.2918, "step": 4313 }, { "epoch": 2.00620058905596, "grad_norm": 0.3832457959651947, "learning_rate": 2.984873458075827e-06, "loss": 0.3288, "step": 4314 }, { "epoch": 2.006665633235157, "grad_norm": 0.34353703260421753, "learning_rate": 2.9823973101296564e-06, "loss": 0.3088, "step": 4315 }, { "epoch": 2.0071306774143545, "grad_norm": 0.37068885564804077, "learning_rate": 2.9799217531065407e-06, "loss": 0.3203, "step": 4316 }, { "epoch": 2.0075957215935514, "grad_norm": 0.3725161850452423, "learning_rate": 2.977446787731532e-06, "loss": 0.3242, "step": 4317 }, { "epoch": 2.0080607657727483, "grad_norm": 0.36433523893356323, "learning_rate": 2.9749724147295054e-06, "loss": 0.3368, "step": 4318 }, { "epoch": 2.0085258099519456, "grad_norm": 0.4169025421142578, "learning_rate": 2.972498634825168e-06, "loss": 0.315, "step": 4319 }, { "epoch": 2.0089908541311425, "grad_norm": 0.41936197876930237, "learning_rate": 2.9700254487430448e-06, "loss": 0.3359, "step": 4320 }, { "epoch": 2.0094558983103394, "grad_norm": 0.3963291347026825, "learning_rate": 2.9675528572074953e-06, "loss": 0.3093, "step": 4321 }, { "epoch": 2.0099209424895363, "grad_norm": 0.38462764024734497, "learning_rate": 2.9650808609427e-06, "loss": 0.3439, "step": 4322 }, { "epoch": 2.0103859866687337, "grad_norm": 0.36186519265174866, "learning_rate": 2.962609460672669e-06, "loss": 0.3149, "step": 4323 }, { "epoch": 2.0108510308479306, "grad_norm": 0.3426899015903473, "learning_rate": 2.960138657121233e-06, "loss": 0.2876, "step": 4324 }, { "epoch": 2.0113160750271275, "grad_norm": 0.3653092682361603, "learning_rate": 2.957668451012049e-06, "loss": 0.347, "step": 4325 }, { "epoch": 2.0117811192063244, "grad_norm": 0.3865642547607422, "learning_rate": 2.955198843068603e-06, "loss": 0.3204, "step": 4326 }, { "epoch": 2.0122461633855218, "grad_norm": 0.4121185839176178, "learning_rate": 2.9527298340142e-06, "loss": 0.3238, "step": 4327 }, { "epoch": 2.0127112075647187, "grad_norm": 0.32982850074768066, "learning_rate": 2.950261424571977e-06, "loss": 0.289, "step": 4328 }, { "epoch": 2.0131762517439156, "grad_norm": 0.36676689982414246, "learning_rate": 2.9477936154648866e-06, "loss": 0.3467, "step": 4329 }, { "epoch": 2.0136412959231125, "grad_norm": 0.368436336517334, "learning_rate": 2.9453264074157134e-06, "loss": 0.352, "step": 4330 }, { "epoch": 2.01410634010231, "grad_norm": 0.4215182363986969, "learning_rate": 2.9428598011470597e-06, "loss": 0.3065, "step": 4331 }, { "epoch": 2.0145713842815067, "grad_norm": 0.3354952037334442, "learning_rate": 2.9403937973813564e-06, "loss": 0.3313, "step": 4332 }, { "epoch": 2.0150364284607036, "grad_norm": 0.3583640456199646, "learning_rate": 2.9379283968408546e-06, "loss": 0.2962, "step": 4333 }, { "epoch": 2.015501472639901, "grad_norm": 0.33064424991607666, "learning_rate": 2.9354636002476324e-06, "loss": 0.3023, "step": 4334 }, { "epoch": 2.015966516819098, "grad_norm": 0.33953142166137695, "learning_rate": 2.9329994083235857e-06, "loss": 0.3078, "step": 4335 }, { "epoch": 2.016431560998295, "grad_norm": 0.3762052059173584, "learning_rate": 2.930535821790439e-06, "loss": 0.3592, "step": 4336 }, { "epoch": 2.0168966051774917, "grad_norm": 0.35094988346099854, "learning_rate": 2.928072841369734e-06, "loss": 0.3177, "step": 4337 }, { "epoch": 2.017361649356689, "grad_norm": 0.3473728597164154, "learning_rate": 2.92561046778284e-06, "loss": 0.35, "step": 4338 }, { "epoch": 2.017826693535886, "grad_norm": 0.3321429491043091, "learning_rate": 2.9231487017509442e-06, "loss": 0.3325, "step": 4339 }, { "epoch": 2.018291737715083, "grad_norm": 0.3240537941455841, "learning_rate": 2.920687543995061e-06, "loss": 0.2927, "step": 4340 }, { "epoch": 2.0187567818942798, "grad_norm": 0.36483100056648254, "learning_rate": 2.91822699523602e-06, "loss": 0.3395, "step": 4341 }, { "epoch": 2.019221826073477, "grad_norm": 0.3937772512435913, "learning_rate": 2.915767056194479e-06, "loss": 0.3633, "step": 4342 }, { "epoch": 2.019686870252674, "grad_norm": 0.33440476655960083, "learning_rate": 2.9133077275909112e-06, "loss": 0.3051, "step": 4343 }, { "epoch": 2.020151914431871, "grad_norm": 0.3541061282157898, "learning_rate": 2.910849010145617e-06, "loss": 0.3395, "step": 4344 }, { "epoch": 2.0206169586110683, "grad_norm": 0.34767183661460876, "learning_rate": 2.9083909045787116e-06, "loss": 0.3267, "step": 4345 }, { "epoch": 2.021082002790265, "grad_norm": 0.3523595333099365, "learning_rate": 2.905933411610136e-06, "loss": 0.3337, "step": 4346 }, { "epoch": 2.021547046969462, "grad_norm": 0.35873571038246155, "learning_rate": 2.9034765319596497e-06, "loss": 0.3286, "step": 4347 }, { "epoch": 2.022012091148659, "grad_norm": 0.38059335947036743, "learning_rate": 2.9010202663468353e-06, "loss": 0.3044, "step": 4348 }, { "epoch": 2.0224771353278563, "grad_norm": 0.3723052144050598, "learning_rate": 2.8985646154910887e-06, "loss": 0.3459, "step": 4349 }, { "epoch": 2.0229421795070532, "grad_norm": 0.3536323308944702, "learning_rate": 2.896109580111634e-06, "loss": 0.3042, "step": 4350 }, { "epoch": 2.02340722368625, "grad_norm": 0.3543092608451843, "learning_rate": 2.8936551609275078e-06, "loss": 0.3361, "step": 4351 }, { "epoch": 2.023872267865447, "grad_norm": 0.3618990182876587, "learning_rate": 2.8912013586575733e-06, "loss": 0.3393, "step": 4352 }, { "epoch": 2.0243373120446444, "grad_norm": 0.35705238580703735, "learning_rate": 2.8887481740205046e-06, "loss": 0.3271, "step": 4353 }, { "epoch": 2.0248023562238413, "grad_norm": 0.36953264474868774, "learning_rate": 2.8862956077348054e-06, "loss": 0.2905, "step": 4354 }, { "epoch": 2.025267400403038, "grad_norm": 0.4037397801876068, "learning_rate": 2.883843660518787e-06, "loss": 0.3424, "step": 4355 }, { "epoch": 2.025732444582235, "grad_norm": 0.3392297327518463, "learning_rate": 2.881392333090589e-06, "loss": 0.3267, "step": 4356 }, { "epoch": 2.0261974887614325, "grad_norm": 0.36336854100227356, "learning_rate": 2.8789416261681624e-06, "loss": 0.3284, "step": 4357 }, { "epoch": 2.0266625329406294, "grad_norm": 0.35528960824012756, "learning_rate": 2.8764915404692805e-06, "loss": 0.3275, "step": 4358 }, { "epoch": 2.0271275771198263, "grad_norm": 0.36412903666496277, "learning_rate": 2.874042076711536e-06, "loss": 0.3135, "step": 4359 }, { "epoch": 2.0275926212990236, "grad_norm": 0.3423042595386505, "learning_rate": 2.871593235612333e-06, "loss": 0.2972, "step": 4360 }, { "epoch": 2.0280576654782205, "grad_norm": 0.36381906270980835, "learning_rate": 2.8691450178889013e-06, "loss": 0.3177, "step": 4361 }, { "epoch": 2.0285227096574174, "grad_norm": 0.36212357878685, "learning_rate": 2.8666974242582794e-06, "loss": 0.337, "step": 4362 }, { "epoch": 2.0289877538366143, "grad_norm": 0.38655251264572144, "learning_rate": 2.864250455437333e-06, "loss": 0.3042, "step": 4363 }, { "epoch": 2.0294527980158117, "grad_norm": 0.32187148928642273, "learning_rate": 2.8618041121427347e-06, "loss": 0.3092, "step": 4364 }, { "epoch": 2.0299178421950086, "grad_norm": 0.3990195691585541, "learning_rate": 2.8593583950909833e-06, "loss": 0.3392, "step": 4365 }, { "epoch": 2.0303828863742055, "grad_norm": 0.35381731390953064, "learning_rate": 2.8569133049983843e-06, "loss": 0.325, "step": 4366 }, { "epoch": 2.0308479305534024, "grad_norm": 0.3144361674785614, "learning_rate": 2.8544688425810707e-06, "loss": 0.2756, "step": 4367 }, { "epoch": 2.0313129747325998, "grad_norm": 0.4486398994922638, "learning_rate": 2.8520250085549807e-06, "loss": 0.337, "step": 4368 }, { "epoch": 2.0317780189117967, "grad_norm": 0.38106465339660645, "learning_rate": 2.8495818036358756e-06, "loss": 0.3487, "step": 4369 }, { "epoch": 2.0322430630909936, "grad_norm": 0.34248411655426025, "learning_rate": 2.8471392285393307e-06, "loss": 0.3025, "step": 4370 }, { "epoch": 2.0327081072701905, "grad_norm": 0.3708108067512512, "learning_rate": 2.8446972839807384e-06, "loss": 0.3159, "step": 4371 }, { "epoch": 2.033173151449388, "grad_norm": 0.3808947503566742, "learning_rate": 2.8422559706753004e-06, "loss": 0.2909, "step": 4372 }, { "epoch": 2.0336381956285847, "grad_norm": 0.36368271708488464, "learning_rate": 2.8398152893380426e-06, "loss": 0.3181, "step": 4373 }, { "epoch": 2.0341032398077816, "grad_norm": 0.38620734214782715, "learning_rate": 2.8373752406837963e-06, "loss": 0.3181, "step": 4374 }, { "epoch": 2.034568283986979, "grad_norm": 0.3720152974128723, "learning_rate": 2.834935825427216e-06, "loss": 0.335, "step": 4375 }, { "epoch": 2.035033328166176, "grad_norm": 0.36220601201057434, "learning_rate": 2.8324970442827627e-06, "loss": 0.3028, "step": 4376 }, { "epoch": 2.035498372345373, "grad_norm": 0.36199939250946045, "learning_rate": 2.8300588979647202e-06, "loss": 0.3198, "step": 4377 }, { "epoch": 2.0359634165245697, "grad_norm": 0.3544287085533142, "learning_rate": 2.827621387187178e-06, "loss": 0.3024, "step": 4378 }, { "epoch": 2.036428460703767, "grad_norm": 0.37387457489967346, "learning_rate": 2.825184512664048e-06, "loss": 0.3213, "step": 4379 }, { "epoch": 2.036893504882964, "grad_norm": 0.37123391032218933, "learning_rate": 2.8227482751090445e-06, "loss": 0.2892, "step": 4380 }, { "epoch": 2.037358549062161, "grad_norm": 0.34186607599258423, "learning_rate": 2.8203126752357067e-06, "loss": 0.2994, "step": 4381 }, { "epoch": 2.0378235932413578, "grad_norm": 0.34526383876800537, "learning_rate": 2.8178777137573814e-06, "loss": 0.3343, "step": 4382 }, { "epoch": 2.038288637420555, "grad_norm": 0.3409002721309662, "learning_rate": 2.8154433913872314e-06, "loss": 0.3204, "step": 4383 }, { "epoch": 2.038753681599752, "grad_norm": 0.3608821630477905, "learning_rate": 2.8130097088382256e-06, "loss": 0.3083, "step": 4384 }, { "epoch": 2.039218725778949, "grad_norm": 0.39243847131729126, "learning_rate": 2.8105766668231548e-06, "loss": 0.2993, "step": 4385 }, { "epoch": 2.039683769958146, "grad_norm": 0.34924769401550293, "learning_rate": 2.8081442660546126e-06, "loss": 0.3602, "step": 4386 }, { "epoch": 2.040148814137343, "grad_norm": 0.3145473003387451, "learning_rate": 2.8057125072450143e-06, "loss": 0.3078, "step": 4387 }, { "epoch": 2.04061385831654, "grad_norm": 0.35963907837867737, "learning_rate": 2.8032813911065795e-06, "loss": 0.3269, "step": 4388 }, { "epoch": 2.041078902495737, "grad_norm": 0.34630414843559265, "learning_rate": 2.8008509183513444e-06, "loss": 0.2996, "step": 4389 }, { "epoch": 2.0415439466749343, "grad_norm": 0.32224032282829285, "learning_rate": 2.7984210896911525e-06, "loss": 0.3007, "step": 4390 }, { "epoch": 2.0420089908541312, "grad_norm": 0.36333921551704407, "learning_rate": 2.795991905837665e-06, "loss": 0.3216, "step": 4391 }, { "epoch": 2.042474035033328, "grad_norm": 0.3574282228946686, "learning_rate": 2.793563367502346e-06, "loss": 0.282, "step": 4392 }, { "epoch": 2.042939079212525, "grad_norm": 0.3617522418498993, "learning_rate": 2.791135475396477e-06, "loss": 0.3471, "step": 4393 }, { "epoch": 2.0434041233917224, "grad_norm": 0.3639947474002838, "learning_rate": 2.7887082302311486e-06, "loss": 0.3791, "step": 4394 }, { "epoch": 2.0438691675709193, "grad_norm": 0.3631550967693329, "learning_rate": 2.786281632717264e-06, "loss": 0.292, "step": 4395 }, { "epoch": 2.044334211750116, "grad_norm": 0.3522656261920929, "learning_rate": 2.7838556835655304e-06, "loss": 0.3327, "step": 4396 }, { "epoch": 2.044799255929313, "grad_norm": 0.3434469699859619, "learning_rate": 2.781430383486468e-06, "loss": 0.3274, "step": 4397 }, { "epoch": 2.0452643001085105, "grad_norm": 0.36457687616348267, "learning_rate": 2.779005733190412e-06, "loss": 0.3083, "step": 4398 }, { "epoch": 2.0457293442877074, "grad_norm": 0.3624141812324524, "learning_rate": 2.7765817333874984e-06, "loss": 0.2975, "step": 4399 }, { "epoch": 2.0461943884669043, "grad_norm": 0.3684295415878296, "learning_rate": 2.7741583847876816e-06, "loss": 0.3205, "step": 4400 }, { "epoch": 2.046659432646101, "grad_norm": 0.3599317967891693, "learning_rate": 2.7717356881007185e-06, "loss": 0.3371, "step": 4401 }, { "epoch": 2.0471244768252985, "grad_norm": 0.3688468039035797, "learning_rate": 2.769313644036179e-06, "loss": 0.3187, "step": 4402 }, { "epoch": 2.0475895210044954, "grad_norm": 0.43112462759017944, "learning_rate": 2.766892253303438e-06, "loss": 0.3259, "step": 4403 }, { "epoch": 2.0480545651836923, "grad_norm": 0.38703638315200806, "learning_rate": 2.7644715166116835e-06, "loss": 0.2869, "step": 4404 }, { "epoch": 2.0485196093628897, "grad_norm": 0.39289000630378723, "learning_rate": 2.7620514346699103e-06, "loss": 0.3227, "step": 4405 }, { "epoch": 2.0489846535420866, "grad_norm": 0.403656542301178, "learning_rate": 2.7596320081869214e-06, "loss": 0.3242, "step": 4406 }, { "epoch": 2.0494496977212835, "grad_norm": 0.42674005031585693, "learning_rate": 2.7572132378713255e-06, "loss": 0.3151, "step": 4407 }, { "epoch": 2.0499147419004804, "grad_norm": 0.35708075761795044, "learning_rate": 2.754795124431544e-06, "loss": 0.3025, "step": 4408 }, { "epoch": 2.0503797860796777, "grad_norm": 0.41090068221092224, "learning_rate": 2.752377668575799e-06, "loss": 0.3488, "step": 4409 }, { "epoch": 2.0508448302588747, "grad_norm": 0.3659050166606903, "learning_rate": 2.749960871012129e-06, "loss": 0.3236, "step": 4410 }, { "epoch": 2.0513098744380716, "grad_norm": 0.4140544533729553, "learning_rate": 2.7475447324483697e-06, "loss": 0.317, "step": 4411 }, { "epoch": 2.0517749186172685, "grad_norm": 0.3611926734447479, "learning_rate": 2.7451292535921738e-06, "loss": 0.2912, "step": 4412 }, { "epoch": 2.052239962796466, "grad_norm": 0.3759238123893738, "learning_rate": 2.7427144351509904e-06, "loss": 0.3198, "step": 4413 }, { "epoch": 2.0527050069756627, "grad_norm": 0.37930646538734436, "learning_rate": 2.7403002778320865e-06, "loss": 0.3247, "step": 4414 }, { "epoch": 2.0531700511548596, "grad_norm": 0.3774726390838623, "learning_rate": 2.737886782342524e-06, "loss": 0.3085, "step": 4415 }, { "epoch": 2.0536350953340565, "grad_norm": 0.4020675718784332, "learning_rate": 2.735473949389179e-06, "loss": 0.2979, "step": 4416 }, { "epoch": 2.054100139513254, "grad_norm": 0.4103391766548157, "learning_rate": 2.733061779678732e-06, "loss": 0.3147, "step": 4417 }, { "epoch": 2.054565183692451, "grad_norm": 0.41063860058784485, "learning_rate": 2.7306502739176686e-06, "loss": 0.3333, "step": 4418 }, { "epoch": 2.0550302278716477, "grad_norm": 0.36508336663246155, "learning_rate": 2.728239432812277e-06, "loss": 0.3225, "step": 4419 }, { "epoch": 2.055495272050845, "grad_norm": 0.34332776069641113, "learning_rate": 2.7258292570686566e-06, "loss": 0.2756, "step": 4420 }, { "epoch": 2.055960316230042, "grad_norm": 0.400511771440506, "learning_rate": 2.7234197473927054e-06, "loss": 0.3704, "step": 4421 }, { "epoch": 2.056425360409239, "grad_norm": 0.32971513271331787, "learning_rate": 2.7210109044901335e-06, "loss": 0.2891, "step": 4422 }, { "epoch": 2.0568904045884358, "grad_norm": 0.357746422290802, "learning_rate": 2.7186027290664474e-06, "loss": 0.3439, "step": 4423 }, { "epoch": 2.057355448767633, "grad_norm": 0.355873703956604, "learning_rate": 2.716195221826967e-06, "loss": 0.329, "step": 4424 }, { "epoch": 2.05782049294683, "grad_norm": 0.3346231281757355, "learning_rate": 2.7137883834768076e-06, "loss": 0.3268, "step": 4425 }, { "epoch": 2.058285537126027, "grad_norm": 0.35958102345466614, "learning_rate": 2.711382214720898e-06, "loss": 0.3305, "step": 4426 }, { "epoch": 2.058750581305224, "grad_norm": 0.3547190725803375, "learning_rate": 2.708976716263961e-06, "loss": 0.3142, "step": 4427 }, { "epoch": 2.059215625484421, "grad_norm": 0.3430922329425812, "learning_rate": 2.7065718888105298e-06, "loss": 0.3278, "step": 4428 }, { "epoch": 2.059680669663618, "grad_norm": 0.36960989236831665, "learning_rate": 2.7041677330649408e-06, "loss": 0.3641, "step": 4429 }, { "epoch": 2.060145713842815, "grad_norm": 0.3565279543399811, "learning_rate": 2.7017642497313324e-06, "loss": 0.3027, "step": 4430 }, { "epoch": 2.060610758022012, "grad_norm": 0.3367528021335602, "learning_rate": 2.6993614395136454e-06, "loss": 0.3133, "step": 4431 }, { "epoch": 2.0610758022012092, "grad_norm": 0.35852697491645813, "learning_rate": 2.6969593031156205e-06, "loss": 0.342, "step": 4432 }, { "epoch": 2.061540846380406, "grad_norm": 0.34888017177581787, "learning_rate": 2.694557841240809e-06, "loss": 0.3282, "step": 4433 }, { "epoch": 2.062005890559603, "grad_norm": 0.35499727725982666, "learning_rate": 2.692157054592557e-06, "loss": 0.2948, "step": 4434 }, { "epoch": 2.0624709347388004, "grad_norm": 0.33716249465942383, "learning_rate": 2.689756943874019e-06, "loss": 0.2994, "step": 4435 }, { "epoch": 2.0629359789179973, "grad_norm": 0.33978933095932007, "learning_rate": 2.687357509788143e-06, "loss": 0.3069, "step": 4436 }, { "epoch": 2.063401023097194, "grad_norm": 0.3515429198741913, "learning_rate": 2.684958753037691e-06, "loss": 0.3488, "step": 4437 }, { "epoch": 2.063866067276391, "grad_norm": 0.3632570207118988, "learning_rate": 2.682560674325215e-06, "loss": 0.326, "step": 4438 }, { "epoch": 2.0643311114555885, "grad_norm": 0.36300498247146606, "learning_rate": 2.680163274353075e-06, "loss": 0.2862, "step": 4439 }, { "epoch": 2.0647961556347854, "grad_norm": 0.3675801753997803, "learning_rate": 2.6777665538234292e-06, "loss": 0.3299, "step": 4440 }, { "epoch": 2.0652611998139823, "grad_norm": 0.3985215127468109, "learning_rate": 2.6753705134382425e-06, "loss": 0.3205, "step": 4441 }, { "epoch": 2.065726243993179, "grad_norm": 0.35572221875190735, "learning_rate": 2.6729751538992704e-06, "loss": 0.3302, "step": 4442 }, { "epoch": 2.0661912881723765, "grad_norm": 0.35556313395500183, "learning_rate": 2.67058047590808e-06, "loss": 0.2882, "step": 4443 }, { "epoch": 2.0666563323515734, "grad_norm": 0.4020705819129944, "learning_rate": 2.6681864801660284e-06, "loss": 0.3273, "step": 4444 }, { "epoch": 2.0671213765307703, "grad_norm": 0.40115731954574585, "learning_rate": 2.6657931673742834e-06, "loss": 0.3591, "step": 4445 }, { "epoch": 2.0675864207099672, "grad_norm": 0.320512592792511, "learning_rate": 2.6634005382338025e-06, "loss": 0.2962, "step": 4446 }, { "epoch": 2.0680514648891646, "grad_norm": 0.35881075263023376, "learning_rate": 2.6610085934453523e-06, "loss": 0.3679, "step": 4447 }, { "epoch": 2.0685165090683615, "grad_norm": 0.35448718070983887, "learning_rate": 2.6586173337094904e-06, "loss": 0.2736, "step": 4448 }, { "epoch": 2.0689815532475584, "grad_norm": 0.378061980009079, "learning_rate": 2.656226759726582e-06, "loss": 0.3526, "step": 4449 }, { "epoch": 2.0694465974267557, "grad_norm": 0.3106938600540161, "learning_rate": 2.6538368721967838e-06, "loss": 0.2833, "step": 4450 }, { "epoch": 2.0699116416059526, "grad_norm": 0.3516807556152344, "learning_rate": 2.6514476718200566e-06, "loss": 0.3392, "step": 4451 }, { "epoch": 2.0703766857851496, "grad_norm": 0.3462466299533844, "learning_rate": 2.649059159296158e-06, "loss": 0.3042, "step": 4452 }, { "epoch": 2.0708417299643465, "grad_norm": 0.36509615182876587, "learning_rate": 2.646671335324647e-06, "loss": 0.3551, "step": 4453 }, { "epoch": 2.071306774143544, "grad_norm": 0.347945898771286, "learning_rate": 2.644284200604874e-06, "loss": 0.3431, "step": 4454 }, { "epoch": 2.0717718183227407, "grad_norm": 0.3239026963710785, "learning_rate": 2.641897755835997e-06, "loss": 0.2844, "step": 4455 }, { "epoch": 2.0722368625019376, "grad_norm": 0.35373586416244507, "learning_rate": 2.6395120017169627e-06, "loss": 0.3261, "step": 4456 }, { "epoch": 2.0727019066811345, "grad_norm": 0.3564927875995636, "learning_rate": 2.6371269389465227e-06, "loss": 0.3213, "step": 4457 }, { "epoch": 2.073166950860332, "grad_norm": 0.3221034109592438, "learning_rate": 2.6347425682232196e-06, "loss": 0.2954, "step": 4458 }, { "epoch": 2.0736319950395288, "grad_norm": 0.36184707283973694, "learning_rate": 2.6323588902454013e-06, "loss": 0.3331, "step": 4459 }, { "epoch": 2.0740970392187257, "grad_norm": 0.3728765845298767, "learning_rate": 2.629975905711204e-06, "loss": 0.3231, "step": 4460 }, { "epoch": 2.0745620833979226, "grad_norm": 0.35533544421195984, "learning_rate": 2.6275936153185694e-06, "loss": 0.3132, "step": 4461 }, { "epoch": 2.07502712757712, "grad_norm": 0.337354451417923, "learning_rate": 2.6252120197652277e-06, "loss": 0.2933, "step": 4462 }, { "epoch": 2.075492171756317, "grad_norm": 0.33192774653434753, "learning_rate": 2.622831119748711e-06, "loss": 0.3354, "step": 4463 }, { "epoch": 2.0759572159355137, "grad_norm": 0.36651748418807983, "learning_rate": 2.620450915966346e-06, "loss": 0.3294, "step": 4464 }, { "epoch": 2.076422260114711, "grad_norm": 0.3576674461364746, "learning_rate": 2.618071409115259e-06, "loss": 0.3174, "step": 4465 }, { "epoch": 2.076887304293908, "grad_norm": 0.33194971084594727, "learning_rate": 2.615692599892364e-06, "loss": 0.3175, "step": 4466 }, { "epoch": 2.077352348473105, "grad_norm": 0.40041840076446533, "learning_rate": 2.6133144889943808e-06, "loss": 0.317, "step": 4467 }, { "epoch": 2.077817392652302, "grad_norm": 0.39717933535575867, "learning_rate": 2.6109370771178155e-06, "loss": 0.3476, "step": 4468 }, { "epoch": 2.078282436831499, "grad_norm": 0.35431498289108276, "learning_rate": 2.6085603649589723e-06, "loss": 0.3019, "step": 4469 }, { "epoch": 2.078747481010696, "grad_norm": 0.3609691560268402, "learning_rate": 2.6061843532139563e-06, "loss": 0.3173, "step": 4470 }, { "epoch": 2.079212525189893, "grad_norm": 0.37636587023735046, "learning_rate": 2.6038090425786577e-06, "loss": 0.3253, "step": 4471 }, { "epoch": 2.07967756936909, "grad_norm": 0.34753209352493286, "learning_rate": 2.601434433748771e-06, "loss": 0.2982, "step": 4472 }, { "epoch": 2.0801426135482872, "grad_norm": 0.3801688849925995, "learning_rate": 2.5990605274197763e-06, "loss": 0.336, "step": 4473 }, { "epoch": 2.080607657727484, "grad_norm": 0.37925639748573303, "learning_rate": 2.596687324286954e-06, "loss": 0.3277, "step": 4474 }, { "epoch": 2.081072701906681, "grad_norm": 0.3775731027126312, "learning_rate": 2.5943148250453774e-06, "loss": 0.3043, "step": 4475 }, { "epoch": 2.0815377460858784, "grad_norm": 0.37141674757003784, "learning_rate": 2.5919430303899144e-06, "loss": 0.3141, "step": 4476 }, { "epoch": 2.0820027902650753, "grad_norm": 0.38826245069503784, "learning_rate": 2.589571941015222e-06, "loss": 0.312, "step": 4477 }, { "epoch": 2.082467834444272, "grad_norm": 0.3552106022834778, "learning_rate": 2.587201557615756e-06, "loss": 0.2905, "step": 4478 }, { "epoch": 2.082932878623469, "grad_norm": 0.40252459049224854, "learning_rate": 2.584831880885761e-06, "loss": 0.3681, "step": 4479 }, { "epoch": 2.0833979228026664, "grad_norm": 0.35997259616851807, "learning_rate": 2.58246291151928e-06, "loss": 0.3051, "step": 4480 }, { "epoch": 2.0838629669818634, "grad_norm": 0.3523218333721161, "learning_rate": 2.580094650210142e-06, "loss": 0.2918, "step": 4481 }, { "epoch": 2.0843280111610603, "grad_norm": 0.4533151090145111, "learning_rate": 2.577727097651976e-06, "loss": 0.3553, "step": 4482 }, { "epoch": 2.084793055340257, "grad_norm": 0.35789787769317627, "learning_rate": 2.575360254538195e-06, "loss": 0.2959, "step": 4483 }, { "epoch": 2.0852580995194545, "grad_norm": 0.39699408411979675, "learning_rate": 2.5729941215620148e-06, "loss": 0.3356, "step": 4484 }, { "epoch": 2.0857231436986514, "grad_norm": 0.3448418974876404, "learning_rate": 2.5706286994164315e-06, "loss": 0.2892, "step": 4485 }, { "epoch": 2.0861881878778483, "grad_norm": 0.3884351849555969, "learning_rate": 2.568263988794242e-06, "loss": 0.3183, "step": 4486 }, { "epoch": 2.0866532320570452, "grad_norm": 0.4005531072616577, "learning_rate": 2.56589999038803e-06, "loss": 0.3394, "step": 4487 }, { "epoch": 2.0871182762362426, "grad_norm": 0.3274531960487366, "learning_rate": 2.563536704890176e-06, "loss": 0.2915, "step": 4488 }, { "epoch": 2.0875833204154395, "grad_norm": 0.418525367975235, "learning_rate": 2.5611741329928436e-06, "loss": 0.347, "step": 4489 }, { "epoch": 2.0880483645946364, "grad_norm": 0.3738149106502533, "learning_rate": 2.558812275387995e-06, "loss": 0.3231, "step": 4490 }, { "epoch": 2.0885134087738333, "grad_norm": 0.3588385581970215, "learning_rate": 2.556451132767377e-06, "loss": 0.3281, "step": 4491 }, { "epoch": 2.0889784529530306, "grad_norm": 0.3635399639606476, "learning_rate": 2.554090705822533e-06, "loss": 0.3367, "step": 4492 }, { "epoch": 2.0894434971322275, "grad_norm": 0.342986136674881, "learning_rate": 2.5517309952447887e-06, "loss": 0.3051, "step": 4493 }, { "epoch": 2.0899085413114245, "grad_norm": 0.3733537495136261, "learning_rate": 2.549372001725272e-06, "loss": 0.3154, "step": 4494 }, { "epoch": 2.090373585490622, "grad_norm": 0.404184490442276, "learning_rate": 2.547013725954887e-06, "loss": 0.3149, "step": 4495 }, { "epoch": 2.0908386296698187, "grad_norm": 0.39761337637901306, "learning_rate": 2.5446561686243397e-06, "loss": 0.3353, "step": 4496 }, { "epoch": 2.0913036738490156, "grad_norm": 0.36512845754623413, "learning_rate": 2.5422993304241163e-06, "loss": 0.3217, "step": 4497 }, { "epoch": 2.0917687180282125, "grad_norm": 0.35249972343444824, "learning_rate": 2.5399432120444985e-06, "loss": 0.2946, "step": 4498 }, { "epoch": 2.09223376220741, "grad_norm": 0.3695544898509979, "learning_rate": 2.537587814175554e-06, "loss": 0.3332, "step": 4499 }, { "epoch": 2.0926988063866068, "grad_norm": 0.3467404246330261, "learning_rate": 2.5352331375071437e-06, "loss": 0.3147, "step": 4500 }, { "epoch": 2.0931638505658037, "grad_norm": 0.3637906610965729, "learning_rate": 2.53287918272891e-06, "loss": 0.3054, "step": 4501 }, { "epoch": 2.0936288947450006, "grad_norm": 0.3547385036945343, "learning_rate": 2.5305259505302914e-06, "loss": 0.3231, "step": 4502 }, { "epoch": 2.094093938924198, "grad_norm": 0.36574193835258484, "learning_rate": 2.5281734416005107e-06, "loss": 0.3125, "step": 4503 }, { "epoch": 2.094558983103395, "grad_norm": 0.3319716155529022, "learning_rate": 2.5258216566285758e-06, "loss": 0.3083, "step": 4504 }, { "epoch": 2.0950240272825917, "grad_norm": 0.3524357080459595, "learning_rate": 2.5234705963032917e-06, "loss": 0.3457, "step": 4505 }, { "epoch": 2.095489071461789, "grad_norm": 0.34479501843452454, "learning_rate": 2.5211202613132413e-06, "loss": 0.3415, "step": 4506 }, { "epoch": 2.095954115640986, "grad_norm": 0.3542589247226715, "learning_rate": 2.5187706523468034e-06, "loss": 0.3051, "step": 4507 }, { "epoch": 2.096419159820183, "grad_norm": 0.3570210337638855, "learning_rate": 2.516421770092136e-06, "loss": 0.3496, "step": 4508 }, { "epoch": 2.09688420399938, "grad_norm": 0.3366619050502777, "learning_rate": 2.5140736152371916e-06, "loss": 0.3166, "step": 4509 }, { "epoch": 2.097349248178577, "grad_norm": 0.3258604407310486, "learning_rate": 2.5117261884697066e-06, "loss": 0.2912, "step": 4510 }, { "epoch": 2.097814292357774, "grad_norm": 0.3651737868785858, "learning_rate": 2.509379490477204e-06, "loss": 0.3397, "step": 4511 }, { "epoch": 2.098279336536971, "grad_norm": 0.335183322429657, "learning_rate": 2.507033521946992e-06, "loss": 0.2965, "step": 4512 }, { "epoch": 2.098744380716168, "grad_norm": 0.37854668498039246, "learning_rate": 2.5046882835661694e-06, "loss": 0.3288, "step": 4513 }, { "epoch": 2.099209424895365, "grad_norm": 0.39023512601852417, "learning_rate": 2.502343776021615e-06, "loss": 0.3475, "step": 4514 }, { "epoch": 2.099674469074562, "grad_norm": 0.37429752945899963, "learning_rate": 2.5000000000000015e-06, "loss": 0.325, "step": 4515 }, { "epoch": 2.100139513253759, "grad_norm": 0.3756406605243683, "learning_rate": 2.4976569561877774e-06, "loss": 0.327, "step": 4516 }, { "epoch": 2.100604557432956, "grad_norm": 0.36603933572769165, "learning_rate": 2.4953146452711866e-06, "loss": 0.341, "step": 4517 }, { "epoch": 2.1010696016121533, "grad_norm": 0.34370267391204834, "learning_rate": 2.492973067936251e-06, "loss": 0.317, "step": 4518 }, { "epoch": 2.10153464579135, "grad_norm": 0.40021899342536926, "learning_rate": 2.490632224868783e-06, "loss": 0.3222, "step": 4519 }, { "epoch": 2.101999689970547, "grad_norm": 0.39490172266960144, "learning_rate": 2.4882921167543745e-06, "loss": 0.3088, "step": 4520 }, { "epoch": 2.102464734149744, "grad_norm": 0.37795066833496094, "learning_rate": 2.485952744278407e-06, "loss": 0.3156, "step": 4521 }, { "epoch": 2.1029297783289413, "grad_norm": 0.3622766435146332, "learning_rate": 2.483614108126045e-06, "loss": 0.3002, "step": 4522 }, { "epoch": 2.1033948225081383, "grad_norm": 0.33893483877182007, "learning_rate": 2.4812762089822384e-06, "loss": 0.3183, "step": 4523 }, { "epoch": 2.103859866687335, "grad_norm": 0.3871879279613495, "learning_rate": 2.478939047531716e-06, "loss": 0.3391, "step": 4524 }, { "epoch": 2.1043249108665325, "grad_norm": 0.34787362813949585, "learning_rate": 2.4766026244589986e-06, "loss": 0.2944, "step": 4525 }, { "epoch": 2.1047899550457294, "grad_norm": 0.36640048027038574, "learning_rate": 2.4742669404483825e-06, "loss": 0.3368, "step": 4526 }, { "epoch": 2.1052549992249263, "grad_norm": 0.36439046263694763, "learning_rate": 2.471931996183956e-06, "loss": 0.3047, "step": 4527 }, { "epoch": 2.105720043404123, "grad_norm": 0.33959466218948364, "learning_rate": 2.4695977923495816e-06, "loss": 0.2989, "step": 4528 }, { "epoch": 2.1061850875833206, "grad_norm": 0.3902963697910309, "learning_rate": 2.4672643296289145e-06, "loss": 0.3183, "step": 4529 }, { "epoch": 2.1066501317625175, "grad_norm": 0.4295981228351593, "learning_rate": 2.464931608705384e-06, "loss": 0.2918, "step": 4530 }, { "epoch": 2.1071151759417144, "grad_norm": 0.36755356192588806, "learning_rate": 2.462599630262209e-06, "loss": 0.324, "step": 4531 }, { "epoch": 2.1075802201209113, "grad_norm": 0.3295392394065857, "learning_rate": 2.4602683949823853e-06, "loss": 0.3026, "step": 4532 }, { "epoch": 2.1080452643001086, "grad_norm": 0.34905338287353516, "learning_rate": 2.457937903548695e-06, "loss": 0.3265, "step": 4533 }, { "epoch": 2.1085103084793055, "grad_norm": 0.37717199325561523, "learning_rate": 2.4556081566437025e-06, "loss": 0.3326, "step": 4534 }, { "epoch": 2.1089753526585024, "grad_norm": 0.34889674186706543, "learning_rate": 2.453279154949753e-06, "loss": 0.3113, "step": 4535 }, { "epoch": 2.1094403968377, "grad_norm": 0.35179656744003296, "learning_rate": 2.4509508991489704e-06, "loss": 0.3188, "step": 4536 }, { "epoch": 2.1099054410168967, "grad_norm": 0.35203817486763, "learning_rate": 2.4486233899232674e-06, "loss": 0.3106, "step": 4537 }, { "epoch": 2.1103704851960936, "grad_norm": 0.3592386841773987, "learning_rate": 2.4462966279543287e-06, "loss": 0.3241, "step": 4538 }, { "epoch": 2.1108355293752905, "grad_norm": 0.37422919273376465, "learning_rate": 2.4439706139236295e-06, "loss": 0.2891, "step": 4539 }, { "epoch": 2.111300573554488, "grad_norm": 0.37109753489494324, "learning_rate": 2.4416453485124196e-06, "loss": 0.3353, "step": 4540 }, { "epoch": 2.1117656177336848, "grad_norm": 0.35821226239204407, "learning_rate": 2.4393208324017294e-06, "loss": 0.3409, "step": 4541 }, { "epoch": 2.1122306619128817, "grad_norm": 0.3365572690963745, "learning_rate": 2.4369970662723756e-06, "loss": 0.2886, "step": 4542 }, { "epoch": 2.1126957060920786, "grad_norm": 0.3579605221748352, "learning_rate": 2.4346740508049484e-06, "loss": 0.3297, "step": 4543 }, { "epoch": 2.113160750271276, "grad_norm": 0.32147571444511414, "learning_rate": 2.432351786679822e-06, "loss": 0.2968, "step": 4544 }, { "epoch": 2.113625794450473, "grad_norm": 0.34730449318885803, "learning_rate": 2.430030274577151e-06, "loss": 0.3432, "step": 4545 }, { "epoch": 2.1140908386296697, "grad_norm": 0.3238038718700409, "learning_rate": 2.4277095151768698e-06, "loss": 0.2922, "step": 4546 }, { "epoch": 2.1145558828088666, "grad_norm": 0.3770063817501068, "learning_rate": 2.4253895091586883e-06, "loss": 0.2981, "step": 4547 }, { "epoch": 2.115020926988064, "grad_norm": 0.39007771015167236, "learning_rate": 2.423070257202101e-06, "loss": 0.3372, "step": 4548 }, { "epoch": 2.115485971167261, "grad_norm": 0.3754151463508606, "learning_rate": 2.420751759986376e-06, "loss": 0.318, "step": 4549 }, { "epoch": 2.115951015346458, "grad_norm": 0.3623104989528656, "learning_rate": 2.4184340181905675e-06, "loss": 0.3214, "step": 4550 }, { "epoch": 2.116416059525655, "grad_norm": 0.34606826305389404, "learning_rate": 2.4161170324935e-06, "loss": 0.3351, "step": 4551 }, { "epoch": 2.116881103704852, "grad_norm": 0.36132997274398804, "learning_rate": 2.4138008035737858e-06, "loss": 0.3475, "step": 4552 }, { "epoch": 2.117346147884049, "grad_norm": 0.36312153935432434, "learning_rate": 2.411485332109806e-06, "loss": 0.3111, "step": 4553 }, { "epoch": 2.117811192063246, "grad_norm": 0.38179275393486023, "learning_rate": 2.4091706187797286e-06, "loss": 0.321, "step": 4554 }, { "epoch": 2.118276236242443, "grad_norm": 0.37554097175598145, "learning_rate": 2.4068566642614923e-06, "loss": 0.2791, "step": 4555 }, { "epoch": 2.11874128042164, "grad_norm": 0.3833089768886566, "learning_rate": 2.4045434692328172e-06, "loss": 0.3357, "step": 4556 }, { "epoch": 2.119206324600837, "grad_norm": 0.35031166672706604, "learning_rate": 2.4022310343712022e-06, "loss": 0.2867, "step": 4557 }, { "epoch": 2.119671368780034, "grad_norm": 0.37646663188934326, "learning_rate": 2.3999193603539234e-06, "loss": 0.3037, "step": 4558 }, { "epoch": 2.1201364129592313, "grad_norm": 0.34302130341529846, "learning_rate": 2.3976084478580282e-06, "loss": 0.3407, "step": 4559 }, { "epoch": 2.120601457138428, "grad_norm": 0.336687296628952, "learning_rate": 2.3952982975603494e-06, "loss": 0.2947, "step": 4560 }, { "epoch": 2.121066501317625, "grad_norm": 0.3723877966403961, "learning_rate": 2.3929889101374887e-06, "loss": 0.3331, "step": 4561 }, { "epoch": 2.121531545496822, "grad_norm": 0.369495153427124, "learning_rate": 2.3906802862658325e-06, "loss": 0.3428, "step": 4562 }, { "epoch": 2.1219965896760193, "grad_norm": 0.3618394136428833, "learning_rate": 2.388372426621534e-06, "loss": 0.3158, "step": 4563 }, { "epoch": 2.1224616338552162, "grad_norm": 0.34468409419059753, "learning_rate": 2.386065331880534e-06, "loss": 0.3068, "step": 4564 }, { "epoch": 2.122926678034413, "grad_norm": 0.3602186441421509, "learning_rate": 2.3837590027185364e-06, "loss": 0.326, "step": 4565 }, { "epoch": 2.1233917222136105, "grad_norm": 0.339193195104599, "learning_rate": 2.381453439811034e-06, "loss": 0.3237, "step": 4566 }, { "epoch": 2.1238567663928074, "grad_norm": 0.3470028340816498, "learning_rate": 2.379148643833283e-06, "loss": 0.3042, "step": 4567 }, { "epoch": 2.1243218105720043, "grad_norm": 0.35298952460289, "learning_rate": 2.3768446154603234e-06, "loss": 0.3114, "step": 4568 }, { "epoch": 2.124786854751201, "grad_norm": 0.39458978176116943, "learning_rate": 2.3745413553669678e-06, "loss": 0.331, "step": 4569 }, { "epoch": 2.1252518989303986, "grad_norm": 0.36957892775535583, "learning_rate": 2.3722388642278047e-06, "loss": 0.2894, "step": 4570 }, { "epoch": 2.1257169431095955, "grad_norm": 0.38413190841674805, "learning_rate": 2.369937142717194e-06, "loss": 0.3403, "step": 4571 }, { "epoch": 2.1261819872887924, "grad_norm": 0.32337597012519836, "learning_rate": 2.3676361915092757e-06, "loss": 0.2965, "step": 4572 }, { "epoch": 2.1266470314679893, "grad_norm": 0.3725772500038147, "learning_rate": 2.3653360112779567e-06, "loss": 0.3541, "step": 4573 }, { "epoch": 2.1271120756471866, "grad_norm": 0.3611546456813812, "learning_rate": 2.363036602696927e-06, "loss": 0.2821, "step": 4574 }, { "epoch": 2.1275771198263835, "grad_norm": 0.362352192401886, "learning_rate": 2.3607379664396414e-06, "loss": 0.3477, "step": 4575 }, { "epoch": 2.1280421640055804, "grad_norm": 0.3450590968132019, "learning_rate": 2.3584401031793377e-06, "loss": 0.2978, "step": 4576 }, { "epoch": 2.128507208184778, "grad_norm": 0.4075535535812378, "learning_rate": 2.35614301358902e-06, "loss": 0.358, "step": 4577 }, { "epoch": 2.1289722523639747, "grad_norm": 0.3503515422344208, "learning_rate": 2.353846698341468e-06, "loss": 0.3289, "step": 4578 }, { "epoch": 2.1294372965431716, "grad_norm": 0.38733020424842834, "learning_rate": 2.351551158109235e-06, "loss": 0.2964, "step": 4579 }, { "epoch": 2.1299023407223685, "grad_norm": 0.35857686400413513, "learning_rate": 2.3492563935646493e-06, "loss": 0.3276, "step": 4580 }, { "epoch": 2.130367384901566, "grad_norm": 0.34984397888183594, "learning_rate": 2.3469624053798117e-06, "loss": 0.3162, "step": 4581 }, { "epoch": 2.1308324290807628, "grad_norm": 0.34331047534942627, "learning_rate": 2.34466919422659e-06, "loss": 0.309, "step": 4582 }, { "epoch": 2.1312974732599597, "grad_norm": 0.32851484417915344, "learning_rate": 2.3423767607766316e-06, "loss": 0.3186, "step": 4583 }, { "epoch": 2.1317625174391566, "grad_norm": 0.360724538564682, "learning_rate": 2.34008510570135e-06, "loss": 0.2974, "step": 4584 }, { "epoch": 2.132227561618354, "grad_norm": 0.3493599593639374, "learning_rate": 2.337794229671938e-06, "loss": 0.3563, "step": 4585 }, { "epoch": 2.132692605797551, "grad_norm": 0.3586118519306183, "learning_rate": 2.3355041333593507e-06, "loss": 0.3515, "step": 4586 }, { "epoch": 2.1331576499767477, "grad_norm": 0.3299708664417267, "learning_rate": 2.3332148174343257e-06, "loss": 0.3117, "step": 4587 }, { "epoch": 2.1336226941559446, "grad_norm": 0.31566065549850464, "learning_rate": 2.3309262825673616e-06, "loss": 0.3165, "step": 4588 }, { "epoch": 2.134087738335142, "grad_norm": 0.35169747471809387, "learning_rate": 2.3286385294287367e-06, "loss": 0.3247, "step": 4589 }, { "epoch": 2.134552782514339, "grad_norm": 0.3681233525276184, "learning_rate": 2.3263515586884935e-06, "loss": 0.3373, "step": 4590 }, { "epoch": 2.135017826693536, "grad_norm": 0.3795618414878845, "learning_rate": 2.32406537101645e-06, "loss": 0.3029, "step": 4591 }, { "epoch": 2.1354828708727327, "grad_norm": 0.3609673082828522, "learning_rate": 2.3217799670821938e-06, "loss": 0.3362, "step": 4592 }, { "epoch": 2.13594791505193, "grad_norm": 0.3580998480319977, "learning_rate": 2.3194953475550846e-06, "loss": 0.3337, "step": 4593 }, { "epoch": 2.136412959231127, "grad_norm": 0.33573347330093384, "learning_rate": 2.3172115131042466e-06, "loss": 0.2821, "step": 4594 }, { "epoch": 2.136878003410324, "grad_norm": 0.3740091919898987, "learning_rate": 2.314928464398581e-06, "loss": 0.3662, "step": 4595 }, { "epoch": 2.137343047589521, "grad_norm": 0.385172963142395, "learning_rate": 2.3126462021067518e-06, "loss": 0.3709, "step": 4596 }, { "epoch": 2.137808091768718, "grad_norm": 0.3355659246444702, "learning_rate": 2.310364726897202e-06, "loss": 0.3105, "step": 4597 }, { "epoch": 2.138273135947915, "grad_norm": 0.36844390630722046, "learning_rate": 2.3080840394381327e-06, "loss": 0.3155, "step": 4598 }, { "epoch": 2.138738180127112, "grad_norm": 0.3672247529029846, "learning_rate": 2.305804140397525e-06, "loss": 0.3089, "step": 4599 }, { "epoch": 2.1392032243063093, "grad_norm": 0.364468514919281, "learning_rate": 2.3035250304431206e-06, "loss": 0.3194, "step": 4600 }, { "epoch": 2.139668268485506, "grad_norm": 0.37167108058929443, "learning_rate": 2.3012467102424373e-06, "loss": 0.3588, "step": 4601 }, { "epoch": 2.140133312664703, "grad_norm": 0.3726482689380646, "learning_rate": 2.2989691804627544e-06, "loss": 0.3193, "step": 4602 }, { "epoch": 2.1405983568439, "grad_norm": 0.3460210859775543, "learning_rate": 2.296692441771125e-06, "loss": 0.3113, "step": 4603 }, { "epoch": 2.1410634010230973, "grad_norm": 0.3563862442970276, "learning_rate": 2.29441649483437e-06, "loss": 0.3405, "step": 4604 }, { "epoch": 2.1415284452022942, "grad_norm": 0.3387638032436371, "learning_rate": 2.2921413403190774e-06, "loss": 0.3125, "step": 4605 }, { "epoch": 2.141993489381491, "grad_norm": 0.38365113735198975, "learning_rate": 2.2898669788916006e-06, "loss": 0.3201, "step": 4606 }, { "epoch": 2.1424585335606885, "grad_norm": 0.369579553604126, "learning_rate": 2.2875934112180664e-06, "loss": 0.3501, "step": 4607 }, { "epoch": 2.1429235777398854, "grad_norm": 0.36825814843177795, "learning_rate": 2.285320637964362e-06, "loss": 0.3147, "step": 4608 }, { "epoch": 2.1433886219190823, "grad_norm": 0.3360195457935333, "learning_rate": 2.2830486597961504e-06, "loss": 0.2841, "step": 4609 }, { "epoch": 2.143853666098279, "grad_norm": 0.3493092358112335, "learning_rate": 2.2807774773788518e-06, "loss": 0.3183, "step": 4610 }, { "epoch": 2.1443187102774766, "grad_norm": 0.3550946116447449, "learning_rate": 2.2785070913776635e-06, "loss": 0.3226, "step": 4611 }, { "epoch": 2.1447837544566735, "grad_norm": 0.3619578778743744, "learning_rate": 2.2762375024575424e-06, "loss": 0.3189, "step": 4612 }, { "epoch": 2.1452487986358704, "grad_norm": 0.388798326253891, "learning_rate": 2.2739687112832125e-06, "loss": 0.3142, "step": 4613 }, { "epoch": 2.1457138428150673, "grad_norm": 0.3689102530479431, "learning_rate": 2.2717007185191673e-06, "loss": 0.3336, "step": 4614 }, { "epoch": 2.1461788869942646, "grad_norm": 0.3550708591938019, "learning_rate": 2.269433524829666e-06, "loss": 0.3183, "step": 4615 }, { "epoch": 2.1466439311734615, "grad_norm": 0.3319764733314514, "learning_rate": 2.267167130878734e-06, "loss": 0.3024, "step": 4616 }, { "epoch": 2.1471089753526584, "grad_norm": 0.35746893286705017, "learning_rate": 2.2649015373301574e-06, "loss": 0.2984, "step": 4617 }, { "epoch": 2.1475740195318553, "grad_norm": 0.3471658527851105, "learning_rate": 2.2626367448474963e-06, "loss": 0.3148, "step": 4618 }, { "epoch": 2.1480390637110527, "grad_norm": 0.3576483130455017, "learning_rate": 2.2603727540940673e-06, "loss": 0.3567, "step": 4619 }, { "epoch": 2.1485041078902496, "grad_norm": 0.31263265013694763, "learning_rate": 2.25810956573296e-06, "loss": 0.294, "step": 4620 }, { "epoch": 2.1489691520694465, "grad_norm": 0.3338179588317871, "learning_rate": 2.255847180427022e-06, "loss": 0.319, "step": 4621 }, { "epoch": 2.1494341962486434, "grad_norm": 0.33670803904533386, "learning_rate": 2.2535855988388734e-06, "loss": 0.3338, "step": 4622 }, { "epoch": 2.1498992404278408, "grad_norm": 0.3188488185405731, "learning_rate": 2.2513248216308897e-06, "loss": 0.2784, "step": 4623 }, { "epoch": 2.1503642846070377, "grad_norm": 0.40295830368995667, "learning_rate": 2.249064849465221e-06, "loss": 0.3436, "step": 4624 }, { "epoch": 2.1508293287862346, "grad_norm": 0.35394805669784546, "learning_rate": 2.2468056830037725e-06, "loss": 0.3132, "step": 4625 }, { "epoch": 2.151294372965432, "grad_norm": 0.3387957215309143, "learning_rate": 2.2445473229082186e-06, "loss": 0.3198, "step": 4626 }, { "epoch": 2.151759417144629, "grad_norm": 0.34971579909324646, "learning_rate": 2.2422897698399964e-06, "loss": 0.2797, "step": 4627 }, { "epoch": 2.1522244613238257, "grad_norm": 0.4021191895008087, "learning_rate": 2.240033024460309e-06, "loss": 0.3572, "step": 4628 }, { "epoch": 2.1526895055030226, "grad_norm": 0.34752318263053894, "learning_rate": 2.2377770874301157e-06, "loss": 0.3026, "step": 4629 }, { "epoch": 2.15315454968222, "grad_norm": 0.338008850812912, "learning_rate": 2.2355219594101483e-06, "loss": 0.3024, "step": 4630 }, { "epoch": 2.153619593861417, "grad_norm": 0.331898033618927, "learning_rate": 2.2332676410608937e-06, "loss": 0.3002, "step": 4631 }, { "epoch": 2.154084638040614, "grad_norm": 0.36906805634498596, "learning_rate": 2.231014133042608e-06, "loss": 0.3276, "step": 4632 }, { "epoch": 2.1545496822198107, "grad_norm": 0.35603275895118713, "learning_rate": 2.2287614360153042e-06, "loss": 0.3409, "step": 4633 }, { "epoch": 2.155014726399008, "grad_norm": 0.3487357199192047, "learning_rate": 2.226509550638764e-06, "loss": 0.326, "step": 4634 }, { "epoch": 2.155479770578205, "grad_norm": 0.3237025737762451, "learning_rate": 2.224258477572524e-06, "loss": 0.2981, "step": 4635 }, { "epoch": 2.155944814757402, "grad_norm": 0.35809311270713806, "learning_rate": 2.222008217475891e-06, "loss": 0.307, "step": 4636 }, { "epoch": 2.156409858936599, "grad_norm": 0.3373444080352783, "learning_rate": 2.219758771007926e-06, "loss": 0.3359, "step": 4637 }, { "epoch": 2.156874903115796, "grad_norm": 0.36277028918266296, "learning_rate": 2.217510138827457e-06, "loss": 0.3766, "step": 4638 }, { "epoch": 2.157339947294993, "grad_norm": 0.32372814416885376, "learning_rate": 2.215262321593072e-06, "loss": 0.2788, "step": 4639 }, { "epoch": 2.15780499147419, "grad_norm": 0.37135806679725647, "learning_rate": 2.2130153199631214e-06, "loss": 0.3146, "step": 4640 }, { "epoch": 2.1582700356533873, "grad_norm": 0.3312724828720093, "learning_rate": 2.2107691345957133e-06, "loss": 0.3261, "step": 4641 }, { "epoch": 2.158735079832584, "grad_norm": 0.3527110517024994, "learning_rate": 2.208523766148721e-06, "loss": 0.3189, "step": 4642 }, { "epoch": 2.159200124011781, "grad_norm": 0.3383033275604248, "learning_rate": 2.2062792152797733e-06, "loss": 0.3293, "step": 4643 }, { "epoch": 2.159665168190978, "grad_norm": 0.4051493704319, "learning_rate": 2.204035482646267e-06, "loss": 0.3627, "step": 4644 }, { "epoch": 2.1601302123701753, "grad_norm": 0.3591381311416626, "learning_rate": 2.20179256890535e-06, "loss": 0.2986, "step": 4645 }, { "epoch": 2.1605952565493722, "grad_norm": 0.38014209270477295, "learning_rate": 2.1995504747139397e-06, "loss": 0.306, "step": 4646 }, { "epoch": 2.161060300728569, "grad_norm": 0.33583784103393555, "learning_rate": 2.1973092007287054e-06, "loss": 0.3432, "step": 4647 }, { "epoch": 2.161525344907766, "grad_norm": 0.3436342775821686, "learning_rate": 2.195068747606084e-06, "loss": 0.3368, "step": 4648 }, { "epoch": 2.1619903890869634, "grad_norm": 0.33585768938064575, "learning_rate": 2.1928291160022634e-06, "loss": 0.3136, "step": 4649 }, { "epoch": 2.1624554332661603, "grad_norm": 0.35160210728645325, "learning_rate": 2.190590306573198e-06, "loss": 0.3302, "step": 4650 }, { "epoch": 2.162920477445357, "grad_norm": 0.35300135612487793, "learning_rate": 2.1883523199745987e-06, "loss": 0.2987, "step": 4651 }, { "epoch": 2.163385521624554, "grad_norm": 0.4004720151424408, "learning_rate": 2.1861151568619336e-06, "loss": 0.3809, "step": 4652 }, { "epoch": 2.1638505658037515, "grad_norm": 0.3017584979534149, "learning_rate": 2.1838788178904346e-06, "loss": 0.2655, "step": 4653 }, { "epoch": 2.1643156099829484, "grad_norm": 0.3657880127429962, "learning_rate": 2.1816433037150856e-06, "loss": 0.3563, "step": 4654 }, { "epoch": 2.1647806541621453, "grad_norm": 0.362338125705719, "learning_rate": 2.179408614990635e-06, "loss": 0.3145, "step": 4655 }, { "epoch": 2.1652456983413426, "grad_norm": 0.3580748736858368, "learning_rate": 2.177174752371585e-06, "loss": 0.3113, "step": 4656 }, { "epoch": 2.1657107425205395, "grad_norm": 0.37395989894866943, "learning_rate": 2.1749417165121994e-06, "loss": 0.3297, "step": 4657 }, { "epoch": 2.1661757866997364, "grad_norm": 0.35120391845703125, "learning_rate": 2.1727095080664956e-06, "loss": 0.3417, "step": 4658 }, { "epoch": 2.1666408308789333, "grad_norm": 0.3511483371257782, "learning_rate": 2.1704781276882547e-06, "loss": 0.3232, "step": 4659 }, { "epoch": 2.1671058750581307, "grad_norm": 0.3221030533313751, "learning_rate": 2.168247576031008e-06, "loss": 0.3062, "step": 4660 }, { "epoch": 2.1675709192373276, "grad_norm": 0.3379102349281311, "learning_rate": 2.16601785374805e-06, "loss": 0.3103, "step": 4661 }, { "epoch": 2.1680359634165245, "grad_norm": 0.35525548458099365, "learning_rate": 2.163788961492429e-06, "loss": 0.3502, "step": 4662 }, { "epoch": 2.1685010075957214, "grad_norm": 0.33914363384246826, "learning_rate": 2.161560899916954e-06, "loss": 0.3336, "step": 4663 }, { "epoch": 2.1689660517749187, "grad_norm": 0.36163097620010376, "learning_rate": 2.159333669674185e-06, "loss": 0.3073, "step": 4664 }, { "epoch": 2.1694310959541157, "grad_norm": 0.34654346108436584, "learning_rate": 2.1571072714164445e-06, "loss": 0.3466, "step": 4665 }, { "epoch": 2.1698961401333126, "grad_norm": 0.33517155051231384, "learning_rate": 2.1548817057958043e-06, "loss": 0.3351, "step": 4666 }, { "epoch": 2.17036118431251, "grad_norm": 0.3178362250328064, "learning_rate": 2.152656973464101e-06, "loss": 0.2925, "step": 4667 }, { "epoch": 2.170826228491707, "grad_norm": 0.4054083824157715, "learning_rate": 2.1504330750729185e-06, "loss": 0.3176, "step": 4668 }, { "epoch": 2.1712912726709037, "grad_norm": 0.31070810556411743, "learning_rate": 2.1482100112736044e-06, "loss": 0.2867, "step": 4669 }, { "epoch": 2.1717563168501006, "grad_norm": 0.3718927800655365, "learning_rate": 2.1459877827172538e-06, "loss": 0.3633, "step": 4670 }, { "epoch": 2.172221361029298, "grad_norm": 0.36688584089279175, "learning_rate": 2.1437663900547255e-06, "loss": 0.3169, "step": 4671 }, { "epoch": 2.172686405208495, "grad_norm": 0.35190173983573914, "learning_rate": 2.141545833936625e-06, "loss": 0.3077, "step": 4672 }, { "epoch": 2.173151449387692, "grad_norm": 0.3758264482021332, "learning_rate": 2.13932611501332e-06, "loss": 0.36, "step": 4673 }, { "epoch": 2.1736164935668887, "grad_norm": 0.33575019240379333, "learning_rate": 2.1371072339349293e-06, "loss": 0.3308, "step": 4674 }, { "epoch": 2.174081537746086, "grad_norm": 0.33818864822387695, "learning_rate": 2.1348891913513293e-06, "loss": 0.3019, "step": 4675 }, { "epoch": 2.174546581925283, "grad_norm": 0.3424922227859497, "learning_rate": 2.132671987912145e-06, "loss": 0.2985, "step": 4676 }, { "epoch": 2.17501162610448, "grad_norm": 0.3274764120578766, "learning_rate": 2.130455624266762e-06, "loss": 0.3055, "step": 4677 }, { "epoch": 2.1754766702836768, "grad_norm": 0.3366675078868866, "learning_rate": 2.128240101064315e-06, "loss": 0.3189, "step": 4678 }, { "epoch": 2.175941714462874, "grad_norm": 0.38803109526634216, "learning_rate": 2.126025418953698e-06, "loss": 0.3544, "step": 4679 }, { "epoch": 2.176406758642071, "grad_norm": 0.32740333676338196, "learning_rate": 2.1238115785835512e-06, "loss": 0.3153, "step": 4680 }, { "epoch": 2.176871802821268, "grad_norm": 0.30742892622947693, "learning_rate": 2.1215985806022765e-06, "loss": 0.2642, "step": 4681 }, { "epoch": 2.177336847000465, "grad_norm": 0.3487866520881653, "learning_rate": 2.1193864256580215e-06, "loss": 0.3637, "step": 4682 }, { "epoch": 2.177801891179662, "grad_norm": 0.35300448536872864, "learning_rate": 2.117175114398694e-06, "loss": 0.3174, "step": 4683 }, { "epoch": 2.178266935358859, "grad_norm": 0.3436736762523651, "learning_rate": 2.1149646474719475e-06, "loss": 0.3213, "step": 4684 }, { "epoch": 2.178731979538056, "grad_norm": 0.32160818576812744, "learning_rate": 2.112755025525193e-06, "loss": 0.3061, "step": 4685 }, { "epoch": 2.1791970237172533, "grad_norm": 0.3675907552242279, "learning_rate": 2.110546249205597e-06, "loss": 0.3207, "step": 4686 }, { "epoch": 2.1796620678964502, "grad_norm": 0.3436359465122223, "learning_rate": 2.1083383191600676e-06, "loss": 0.3222, "step": 4687 }, { "epoch": 2.180127112075647, "grad_norm": 0.32878395915031433, "learning_rate": 2.106131236035277e-06, "loss": 0.3153, "step": 4688 }, { "epoch": 2.180592156254844, "grad_norm": 0.3562372624874115, "learning_rate": 2.1039250004776397e-06, "loss": 0.349, "step": 4689 }, { "epoch": 2.1810572004340414, "grad_norm": 0.33452579379081726, "learning_rate": 2.1017196131333306e-06, "loss": 0.3016, "step": 4690 }, { "epoch": 2.1815222446132383, "grad_norm": 0.37932857871055603, "learning_rate": 2.099515074648267e-06, "loss": 0.3182, "step": 4691 }, { "epoch": 2.181987288792435, "grad_norm": 0.35700809955596924, "learning_rate": 2.0973113856681277e-06, "loss": 0.3217, "step": 4692 }, { "epoch": 2.182452332971632, "grad_norm": 0.37079155445098877, "learning_rate": 2.0951085468383326e-06, "loss": 0.3399, "step": 4693 }, { "epoch": 2.1829173771508295, "grad_norm": 0.3523573577404022, "learning_rate": 2.0929065588040615e-06, "loss": 0.3152, "step": 4694 }, { "epoch": 2.1833824213300264, "grad_norm": 0.33730053901672363, "learning_rate": 2.0907054222102367e-06, "loss": 0.3066, "step": 4695 }, { "epoch": 2.1838474655092233, "grad_norm": 0.34836792945861816, "learning_rate": 2.088505137701538e-06, "loss": 0.3236, "step": 4696 }, { "epoch": 2.1843125096884206, "grad_norm": 0.3443335294723511, "learning_rate": 2.0863057059223923e-06, "loss": 0.2979, "step": 4697 }, { "epoch": 2.1847775538676175, "grad_norm": 0.32043859362602234, "learning_rate": 2.08410712751698e-06, "loss": 0.3132, "step": 4698 }, { "epoch": 2.1852425980468144, "grad_norm": 0.3443070948123932, "learning_rate": 2.081909403129225e-06, "loss": 0.297, "step": 4699 }, { "epoch": 2.1857076422260113, "grad_norm": 0.3612792193889618, "learning_rate": 2.079712533402808e-06, "loss": 0.3369, "step": 4700 }, { "epoch": 2.1861726864052087, "grad_norm": 0.3530242443084717, "learning_rate": 2.0775165189811534e-06, "loss": 0.2989, "step": 4701 }, { "epoch": 2.1866377305844056, "grad_norm": 0.33956217765808105, "learning_rate": 2.0753213605074424e-06, "loss": 0.3195, "step": 4702 }, { "epoch": 2.1871027747636025, "grad_norm": 0.3230421245098114, "learning_rate": 2.0731270586245972e-06, "loss": 0.3039, "step": 4703 }, { "epoch": 2.1875678189427994, "grad_norm": 0.41725921630859375, "learning_rate": 2.070933613975296e-06, "loss": 0.3244, "step": 4704 }, { "epoch": 2.1880328631219967, "grad_norm": 0.33200743794441223, "learning_rate": 2.068741027201961e-06, "loss": 0.3153, "step": 4705 }, { "epoch": 2.1884979073011936, "grad_norm": 0.3534869849681854, "learning_rate": 2.066549298946767e-06, "loss": 0.344, "step": 4706 }, { "epoch": 2.1889629514803906, "grad_norm": 0.308760404586792, "learning_rate": 2.064358429851634e-06, "loss": 0.2962, "step": 4707 }, { "epoch": 2.1894279956595875, "grad_norm": 0.36021149158477783, "learning_rate": 2.062168420558232e-06, "loss": 0.3497, "step": 4708 }, { "epoch": 2.189893039838785, "grad_norm": 0.3680166006088257, "learning_rate": 2.0599792717079807e-06, "loss": 0.3084, "step": 4709 }, { "epoch": 2.1903580840179817, "grad_norm": 0.3480318486690521, "learning_rate": 2.0577909839420468e-06, "loss": 0.3213, "step": 4710 }, { "epoch": 2.1908231281971786, "grad_norm": 0.33287766575813293, "learning_rate": 2.0556035579013417e-06, "loss": 0.2899, "step": 4711 }, { "epoch": 2.1912881723763755, "grad_norm": 0.36140042543411255, "learning_rate": 2.0534169942265298e-06, "loss": 0.3306, "step": 4712 }, { "epoch": 2.191753216555573, "grad_norm": 0.3293544352054596, "learning_rate": 2.0512312935580163e-06, "loss": 0.2999, "step": 4713 }, { "epoch": 2.1922182607347698, "grad_norm": 0.3596755266189575, "learning_rate": 2.0490464565359615e-06, "loss": 0.3341, "step": 4714 }, { "epoch": 2.1926833049139667, "grad_norm": 0.3495473563671112, "learning_rate": 2.0468624838002647e-06, "loss": 0.3237, "step": 4715 }, { "epoch": 2.193148349093164, "grad_norm": 0.3244265615940094, "learning_rate": 2.044679375990581e-06, "loss": 0.3232, "step": 4716 }, { "epoch": 2.193613393272361, "grad_norm": 0.34927719831466675, "learning_rate": 2.0424971337463017e-06, "loss": 0.3305, "step": 4717 }, { "epoch": 2.194078437451558, "grad_norm": 0.3691467046737671, "learning_rate": 2.0403157577065746e-06, "loss": 0.3485, "step": 4718 }, { "epoch": 2.1945434816307547, "grad_norm": 0.3577490448951721, "learning_rate": 2.0381352485102857e-06, "loss": 0.3291, "step": 4719 }, { "epoch": 2.195008525809952, "grad_norm": 0.344149649143219, "learning_rate": 2.0359556067960727e-06, "loss": 0.3146, "step": 4720 }, { "epoch": 2.195473569989149, "grad_norm": 0.33729931712150574, "learning_rate": 2.0337768332023185e-06, "loss": 0.2903, "step": 4721 }, { "epoch": 2.195938614168346, "grad_norm": 0.36155104637145996, "learning_rate": 2.0315989283671474e-06, "loss": 0.3305, "step": 4722 }, { "epoch": 2.196403658347543, "grad_norm": 0.3909613788127899, "learning_rate": 2.029421892928436e-06, "loss": 0.3343, "step": 4723 }, { "epoch": 2.19686870252674, "grad_norm": 0.3571639358997345, "learning_rate": 2.027245727523798e-06, "loss": 0.2831, "step": 4724 }, { "epoch": 2.197333746705937, "grad_norm": 0.3750656247138977, "learning_rate": 2.0250704327906025e-06, "loss": 0.3303, "step": 4725 }, { "epoch": 2.197798790885134, "grad_norm": 0.35987409949302673, "learning_rate": 2.022896009365952e-06, "loss": 0.3574, "step": 4726 }, { "epoch": 2.1982638350643313, "grad_norm": 0.3430827558040619, "learning_rate": 2.020722457886705e-06, "loss": 0.3216, "step": 4727 }, { "epoch": 2.1987288792435282, "grad_norm": 0.3294491171836853, "learning_rate": 2.018549778989456e-06, "loss": 0.3249, "step": 4728 }, { "epoch": 2.199193923422725, "grad_norm": 0.39135822653770447, "learning_rate": 2.0163779733105497e-06, "loss": 0.3184, "step": 4729 }, { "epoch": 2.199658967601922, "grad_norm": 0.3750271201133728, "learning_rate": 2.0142070414860704e-06, "loss": 0.3288, "step": 4730 }, { "epoch": 2.2001240117811194, "grad_norm": 0.35246542096138, "learning_rate": 2.0120369841518496e-06, "loss": 0.3001, "step": 4731 }, { "epoch": 2.2005890559603163, "grad_norm": 0.3286935091018677, "learning_rate": 2.009867801943462e-06, "loss": 0.331, "step": 4732 }, { "epoch": 2.201054100139513, "grad_norm": 0.37689733505249023, "learning_rate": 2.007699495496228e-06, "loss": 0.3225, "step": 4733 }, { "epoch": 2.20151914431871, "grad_norm": 0.358782559633255, "learning_rate": 2.0055320654452055e-06, "loss": 0.3297, "step": 4734 }, { "epoch": 2.2019841884979074, "grad_norm": 0.33447474241256714, "learning_rate": 2.0033655124252033e-06, "loss": 0.3196, "step": 4735 }, { "epoch": 2.2024492326771044, "grad_norm": 0.36790183186531067, "learning_rate": 2.001199837070766e-06, "loss": 0.2924, "step": 4736 }, { "epoch": 2.2029142768563013, "grad_norm": 0.384657084941864, "learning_rate": 1.999035040016188e-06, "loss": 0.355, "step": 4737 }, { "epoch": 2.2033793210354986, "grad_norm": 0.32145190238952637, "learning_rate": 1.9968711218954994e-06, "loss": 0.3055, "step": 4738 }, { "epoch": 2.2038443652146955, "grad_norm": 0.34638088941574097, "learning_rate": 1.9947080833424816e-06, "loss": 0.3214, "step": 4739 }, { "epoch": 2.2043094093938924, "grad_norm": 0.33720049262046814, "learning_rate": 1.9925459249906488e-06, "loss": 0.3181, "step": 4740 }, { "epoch": 2.2047744535730893, "grad_norm": 0.3274615705013275, "learning_rate": 1.990384647473265e-06, "loss": 0.3015, "step": 4741 }, { "epoch": 2.2052394977522867, "grad_norm": 0.3554820716381073, "learning_rate": 1.9882242514233313e-06, "loss": 0.3141, "step": 4742 }, { "epoch": 2.2057045419314836, "grad_norm": 0.35325977206230164, "learning_rate": 1.9860647374735937e-06, "loss": 0.309, "step": 4743 }, { "epoch": 2.2061695861106805, "grad_norm": 0.3346039652824402, "learning_rate": 1.9839061062565384e-06, "loss": 0.2945, "step": 4744 }, { "epoch": 2.2066346302898774, "grad_norm": 0.3343028426170349, "learning_rate": 1.9817483584043954e-06, "loss": 0.3328, "step": 4745 }, { "epoch": 2.2070996744690747, "grad_norm": 0.33999499678611755, "learning_rate": 1.9795914945491305e-06, "loss": 0.323, "step": 4746 }, { "epoch": 2.2075647186482716, "grad_norm": 0.32321181893348694, "learning_rate": 1.977435515322458e-06, "loss": 0.2882, "step": 4747 }, { "epoch": 2.2080297628274685, "grad_norm": 0.38171252608299255, "learning_rate": 1.9752804213558254e-06, "loss": 0.3073, "step": 4748 }, { "epoch": 2.2084948070066655, "grad_norm": 0.37734076380729675, "learning_rate": 1.9731262132804275e-06, "loss": 0.3522, "step": 4749 }, { "epoch": 2.208959851185863, "grad_norm": 0.34129461646080017, "learning_rate": 1.970972891727194e-06, "loss": 0.3072, "step": 4750 }, { "epoch": 2.2094248953650597, "grad_norm": 0.36008572578430176, "learning_rate": 1.9688204573268015e-06, "loss": 0.3233, "step": 4751 }, { "epoch": 2.2098899395442566, "grad_norm": 0.3995450735092163, "learning_rate": 1.9666689107096597e-06, "loss": 0.3445, "step": 4752 }, { "epoch": 2.2103549837234535, "grad_norm": 0.3434513211250305, "learning_rate": 1.964518252505925e-06, "loss": 0.3023, "step": 4753 }, { "epoch": 2.210820027902651, "grad_norm": 0.31967639923095703, "learning_rate": 1.962368483345486e-06, "loss": 0.3327, "step": 4754 }, { "epoch": 2.2112850720818478, "grad_norm": 0.30786556005477905, "learning_rate": 1.9602196038579774e-06, "loss": 0.3022, "step": 4755 }, { "epoch": 2.2117501162610447, "grad_norm": 0.3452177345752716, "learning_rate": 1.9580716146727734e-06, "loss": 0.3256, "step": 4756 }, { "epoch": 2.212215160440242, "grad_norm": 0.33406537771224976, "learning_rate": 1.9559245164189812e-06, "loss": 0.3443, "step": 4757 }, { "epoch": 2.212680204619439, "grad_norm": 0.31842756271362305, "learning_rate": 1.9537783097254543e-06, "loss": 0.3247, "step": 4758 }, { "epoch": 2.213145248798636, "grad_norm": 0.3189336955547333, "learning_rate": 1.9516329952207787e-06, "loss": 0.3472, "step": 4759 }, { "epoch": 2.2136102929778327, "grad_norm": 0.3681834638118744, "learning_rate": 1.949488573533285e-06, "loss": 0.3135, "step": 4760 }, { "epoch": 2.21407533715703, "grad_norm": 0.3542393445968628, "learning_rate": 1.9473450452910365e-06, "loss": 0.3136, "step": 4761 }, { "epoch": 2.214540381336227, "grad_norm": 0.3548336327075958, "learning_rate": 1.9452024111218414e-06, "loss": 0.3259, "step": 4762 }, { "epoch": 2.215005425515424, "grad_norm": 0.34949004650115967, "learning_rate": 1.9430606716532393e-06, "loss": 0.331, "step": 4763 }, { "epoch": 2.215470469694621, "grad_norm": 0.3351438641548157, "learning_rate": 1.940919827512513e-06, "loss": 0.2809, "step": 4764 }, { "epoch": 2.215935513873818, "grad_norm": 0.3751305639743805, "learning_rate": 1.938779879326679e-06, "loss": 0.3407, "step": 4765 }, { "epoch": 2.216400558053015, "grad_norm": 0.3458346724510193, "learning_rate": 1.936640827722494e-06, "loss": 0.3044, "step": 4766 }, { "epoch": 2.216865602232212, "grad_norm": 0.31323379278182983, "learning_rate": 1.934502673326452e-06, "loss": 0.2983, "step": 4767 }, { "epoch": 2.2173306464114093, "grad_norm": 0.35735267400741577, "learning_rate": 1.9323654167647854e-06, "loss": 0.3316, "step": 4768 }, { "epoch": 2.217795690590606, "grad_norm": 0.3447880744934082, "learning_rate": 1.930229058663459e-06, "loss": 0.2798, "step": 4769 }, { "epoch": 2.218260734769803, "grad_norm": 0.3700970709323883, "learning_rate": 1.9280935996481792e-06, "loss": 0.319, "step": 4770 }, { "epoch": 2.218725778949, "grad_norm": 0.34711953997612, "learning_rate": 1.9259590403443857e-06, "loss": 0.3449, "step": 4771 }, { "epoch": 2.2191908231281974, "grad_norm": 0.3344910144805908, "learning_rate": 1.923825381377259e-06, "loss": 0.2688, "step": 4772 }, { "epoch": 2.2196558673073943, "grad_norm": 0.38596683740615845, "learning_rate": 1.9216926233717087e-06, "loss": 0.3415, "step": 4773 }, { "epoch": 2.220120911486591, "grad_norm": 0.36054715514183044, "learning_rate": 1.9195607669523903e-06, "loss": 0.2957, "step": 4774 }, { "epoch": 2.220585955665788, "grad_norm": 0.33113333582878113, "learning_rate": 1.9174298127436845e-06, "loss": 0.3162, "step": 4775 }, { "epoch": 2.2210509998449854, "grad_norm": 0.3493165373802185, "learning_rate": 1.9152997613697184e-06, "loss": 0.3168, "step": 4776 }, { "epoch": 2.2215160440241823, "grad_norm": 0.37465736269950867, "learning_rate": 1.913170613454345e-06, "loss": 0.351, "step": 4777 }, { "epoch": 2.2219810882033793, "grad_norm": 0.3411155045032501, "learning_rate": 1.9110423696211588e-06, "loss": 0.3113, "step": 4778 }, { "epoch": 2.222446132382576, "grad_norm": 0.35005953907966614, "learning_rate": 1.9089150304934883e-06, "loss": 0.3244, "step": 4779 }, { "epoch": 2.2229111765617735, "grad_norm": 0.3763487935066223, "learning_rate": 1.9067885966943983e-06, "loss": 0.3509, "step": 4780 }, { "epoch": 2.2233762207409704, "grad_norm": 0.3517736792564392, "learning_rate": 1.9046630688466827e-06, "loss": 0.3222, "step": 4781 }, { "epoch": 2.2238412649201673, "grad_norm": 0.3561932146549225, "learning_rate": 1.9025384475728787e-06, "loss": 0.3047, "step": 4782 }, { "epoch": 2.224306309099364, "grad_norm": 0.3545885980129242, "learning_rate": 1.9004147334952483e-06, "loss": 0.3192, "step": 4783 }, { "epoch": 2.2247713532785616, "grad_norm": 0.3312489092350006, "learning_rate": 1.8982919272357974e-06, "loss": 0.3082, "step": 4784 }, { "epoch": 2.2252363974577585, "grad_norm": 0.36054548621177673, "learning_rate": 1.8961700294162578e-06, "loss": 0.3275, "step": 4785 }, { "epoch": 2.2257014416369554, "grad_norm": 0.350093811750412, "learning_rate": 1.8940490406581018e-06, "loss": 0.2902, "step": 4786 }, { "epoch": 2.2261664858161527, "grad_norm": 0.34745752811431885, "learning_rate": 1.8919289615825304e-06, "loss": 0.3468, "step": 4787 }, { "epoch": 2.2266315299953496, "grad_norm": 0.3414476811885834, "learning_rate": 1.8898097928104825e-06, "loss": 0.2941, "step": 4788 }, { "epoch": 2.2270965741745465, "grad_norm": 0.3686240315437317, "learning_rate": 1.8876915349626258e-06, "loss": 0.3379, "step": 4789 }, { "epoch": 2.2275616183537434, "grad_norm": 0.3241952657699585, "learning_rate": 1.8855741886593643e-06, "loss": 0.3107, "step": 4790 }, { "epoch": 2.228026662532941, "grad_norm": 0.3654305934906006, "learning_rate": 1.883457754520835e-06, "loss": 0.3199, "step": 4791 }, { "epoch": 2.2284917067121377, "grad_norm": 0.3426763415336609, "learning_rate": 1.8813422331669084e-06, "loss": 0.3018, "step": 4792 }, { "epoch": 2.2289567508913346, "grad_norm": 0.39639508724212646, "learning_rate": 1.8792276252171855e-06, "loss": 0.3529, "step": 4793 }, { "epoch": 2.2294217950705315, "grad_norm": 0.3361477553844452, "learning_rate": 1.8771139312909976e-06, "loss": 0.2977, "step": 4794 }, { "epoch": 2.229886839249729, "grad_norm": 0.3340165913105011, "learning_rate": 1.8750011520074158e-06, "loss": 0.2923, "step": 4795 }, { "epoch": 2.2303518834289258, "grad_norm": 0.3512633442878723, "learning_rate": 1.8728892879852345e-06, "loss": 0.3543, "step": 4796 }, { "epoch": 2.2308169276081227, "grad_norm": 0.35980337858200073, "learning_rate": 1.870778339842989e-06, "loss": 0.3258, "step": 4797 }, { "epoch": 2.23128197178732, "grad_norm": 0.3445799946784973, "learning_rate": 1.8686683081989371e-06, "loss": 0.3035, "step": 4798 }, { "epoch": 2.231747015966517, "grad_norm": 0.3666563630104065, "learning_rate": 1.866559193671077e-06, "loss": 0.2764, "step": 4799 }, { "epoch": 2.232212060145714, "grad_norm": 0.36018139123916626, "learning_rate": 1.8644509968771302e-06, "loss": 0.3334, "step": 4800 }, { "epoch": 2.2326771043249107, "grad_norm": 0.3475804626941681, "learning_rate": 1.8623437184345556e-06, "loss": 0.34, "step": 4801 }, { "epoch": 2.233142148504108, "grad_norm": 0.33812299370765686, "learning_rate": 1.86023735896054e-06, "loss": 0.3183, "step": 4802 }, { "epoch": 2.233607192683305, "grad_norm": 0.3337913453578949, "learning_rate": 1.8581319190720038e-06, "loss": 0.3004, "step": 4803 }, { "epoch": 2.234072236862502, "grad_norm": 0.3546447157859802, "learning_rate": 1.8560273993855938e-06, "loss": 0.3009, "step": 4804 }, { "epoch": 2.234537281041699, "grad_norm": 0.37179654836654663, "learning_rate": 1.8539238005176912e-06, "loss": 0.3277, "step": 4805 }, { "epoch": 2.235002325220896, "grad_norm": 0.36469766497612, "learning_rate": 1.8518211230844042e-06, "loss": 0.3292, "step": 4806 }, { "epoch": 2.235467369400093, "grad_norm": 0.3361158072948456, "learning_rate": 1.849719367701575e-06, "loss": 0.3354, "step": 4807 }, { "epoch": 2.23593241357929, "grad_norm": 0.3247022330760956, "learning_rate": 1.8476185349847713e-06, "loss": 0.3196, "step": 4808 }, { "epoch": 2.236397457758487, "grad_norm": 0.36382511258125305, "learning_rate": 1.8455186255492956e-06, "loss": 0.342, "step": 4809 }, { "epoch": 2.236862501937684, "grad_norm": 0.33687886595726013, "learning_rate": 1.8434196400101744e-06, "loss": 0.2892, "step": 4810 }, { "epoch": 2.237327546116881, "grad_norm": 0.3983268737792969, "learning_rate": 1.8413215789821692e-06, "loss": 0.3366, "step": 4811 }, { "epoch": 2.237792590296078, "grad_norm": 0.37651053071022034, "learning_rate": 1.839224443079765e-06, "loss": 0.35, "step": 4812 }, { "epoch": 2.238257634475275, "grad_norm": 0.33488836884498596, "learning_rate": 1.8371282329171803e-06, "loss": 0.2801, "step": 4813 }, { "epoch": 2.2387226786544723, "grad_norm": 0.341766357421875, "learning_rate": 1.8350329491083613e-06, "loss": 0.3194, "step": 4814 }, { "epoch": 2.239187722833669, "grad_norm": 0.3747437596321106, "learning_rate": 1.832938592266984e-06, "loss": 0.3123, "step": 4815 }, { "epoch": 2.239652767012866, "grad_norm": 0.351366251707077, "learning_rate": 1.8308451630064484e-06, "loss": 0.309, "step": 4816 }, { "epoch": 2.2401178111920634, "grad_norm": 0.40906503796577454, "learning_rate": 1.8287526619398888e-06, "loss": 0.3532, "step": 4817 }, { "epoch": 2.2405828553712603, "grad_norm": 0.3506213426589966, "learning_rate": 1.8266610896801624e-06, "loss": 0.3236, "step": 4818 }, { "epoch": 2.2410478995504572, "grad_norm": 0.3513505756855011, "learning_rate": 1.824570446839859e-06, "loss": 0.3113, "step": 4819 }, { "epoch": 2.241512943729654, "grad_norm": 0.33928313851356506, "learning_rate": 1.8224807340312912e-06, "loss": 0.3216, "step": 4820 }, { "epoch": 2.2419779879088515, "grad_norm": 0.3162320852279663, "learning_rate": 1.8203919518665049e-06, "loss": 0.3097, "step": 4821 }, { "epoch": 2.2424430320880484, "grad_norm": 0.36044222116470337, "learning_rate": 1.8183041009572678e-06, "loss": 0.33, "step": 4822 }, { "epoch": 2.2429080762672453, "grad_norm": 0.4068707525730133, "learning_rate": 1.8162171819150798e-06, "loss": 0.3338, "step": 4823 }, { "epoch": 2.243373120446442, "grad_norm": 0.3441452383995056, "learning_rate": 1.8141311953511637e-06, "loss": 0.3387, "step": 4824 }, { "epoch": 2.2438381646256396, "grad_norm": 0.33748528361320496, "learning_rate": 1.8120461418764711e-06, "loss": 0.3243, "step": 4825 }, { "epoch": 2.2443032088048365, "grad_norm": 0.3908786475658417, "learning_rate": 1.8099620221016818e-06, "loss": 0.3282, "step": 4826 }, { "epoch": 2.2447682529840334, "grad_norm": 0.3668133616447449, "learning_rate": 1.8078788366372008e-06, "loss": 0.2975, "step": 4827 }, { "epoch": 2.2452332971632307, "grad_norm": 0.3833257257938385, "learning_rate": 1.8057965860931593e-06, "loss": 0.312, "step": 4828 }, { "epoch": 2.2456983413424276, "grad_norm": 0.3340941369533539, "learning_rate": 1.8037152710794115e-06, "loss": 0.3094, "step": 4829 }, { "epoch": 2.2461633855216245, "grad_norm": 0.3409848213195801, "learning_rate": 1.8016348922055448e-06, "loss": 0.2966, "step": 4830 }, { "epoch": 2.2466284297008214, "grad_norm": 0.3662347197532654, "learning_rate": 1.7995554500808655e-06, "loss": 0.3617, "step": 4831 }, { "epoch": 2.247093473880019, "grad_norm": 0.4032919406890869, "learning_rate": 1.7974769453144102e-06, "loss": 0.3042, "step": 4832 }, { "epoch": 2.2475585180592157, "grad_norm": 0.3917955160140991, "learning_rate": 1.7953993785149377e-06, "loss": 0.3188, "step": 4833 }, { "epoch": 2.2480235622384126, "grad_norm": 0.36748558282852173, "learning_rate": 1.7933227502909361e-06, "loss": 0.3355, "step": 4834 }, { "epoch": 2.2484886064176095, "grad_norm": 0.36093080043792725, "learning_rate": 1.7912470612506123e-06, "loss": 0.3389, "step": 4835 }, { "epoch": 2.248953650596807, "grad_norm": 0.34391847252845764, "learning_rate": 1.7891723120019038e-06, "loss": 0.3463, "step": 4836 }, { "epoch": 2.2494186947760038, "grad_norm": 0.3272242844104767, "learning_rate": 1.7870985031524718e-06, "loss": 0.3093, "step": 4837 }, { "epoch": 2.2498837389552007, "grad_norm": 0.3449787199497223, "learning_rate": 1.7850256353097017e-06, "loss": 0.3272, "step": 4838 }, { "epoch": 2.250348783134398, "grad_norm": 0.36595356464385986, "learning_rate": 1.7829537090807002e-06, "loss": 0.325, "step": 4839 }, { "epoch": 2.250813827313595, "grad_norm": 0.35330867767333984, "learning_rate": 1.7808827250723043e-06, "loss": 0.3626, "step": 4840 }, { "epoch": 2.251278871492792, "grad_norm": 0.3324193060398102, "learning_rate": 1.7788126838910674e-06, "loss": 0.3043, "step": 4841 }, { "epoch": 2.2517439156719887, "grad_norm": 0.36338382959365845, "learning_rate": 1.7767435861432752e-06, "loss": 0.3228, "step": 4842 }, { "epoch": 2.2522089598511856, "grad_norm": 0.3679875135421753, "learning_rate": 1.7746754324349291e-06, "loss": 0.366, "step": 4843 }, { "epoch": 2.252674004030383, "grad_norm": 0.3294106423854828, "learning_rate": 1.7726082233717607e-06, "loss": 0.3195, "step": 4844 }, { "epoch": 2.25313904820958, "grad_norm": 0.3595585227012634, "learning_rate": 1.7705419595592193e-06, "loss": 0.3553, "step": 4845 }, { "epoch": 2.253604092388777, "grad_norm": 0.34528473019599915, "learning_rate": 1.7684766416024828e-06, "loss": 0.302, "step": 4846 }, { "epoch": 2.254069136567974, "grad_norm": 0.38927531242370605, "learning_rate": 1.7664122701064462e-06, "loss": 0.3594, "step": 4847 }, { "epoch": 2.254534180747171, "grad_norm": 0.30416175723075867, "learning_rate": 1.7643488456757324e-06, "loss": 0.3064, "step": 4848 }, { "epoch": 2.254999224926368, "grad_norm": 0.37094974517822266, "learning_rate": 1.7622863689146841e-06, "loss": 0.3446, "step": 4849 }, { "epoch": 2.255464269105565, "grad_norm": 0.31221383810043335, "learning_rate": 1.7602248404273692e-06, "loss": 0.2903, "step": 4850 }, { "epoch": 2.255929313284762, "grad_norm": 0.33237534761428833, "learning_rate": 1.758164260817573e-06, "loss": 0.3272, "step": 4851 }, { "epoch": 2.256394357463959, "grad_norm": 0.3221777677536011, "learning_rate": 1.7561046306888092e-06, "loss": 0.3238, "step": 4852 }, { "epoch": 2.256859401643156, "grad_norm": 0.3504292964935303, "learning_rate": 1.7540459506443052e-06, "loss": 0.3285, "step": 4853 }, { "epoch": 2.257324445822353, "grad_norm": 0.3311966359615326, "learning_rate": 1.7519882212870204e-06, "loss": 0.3255, "step": 4854 }, { "epoch": 2.2577894900015503, "grad_norm": 0.36982831358909607, "learning_rate": 1.7499314432196257e-06, "loss": 0.3495, "step": 4855 }, { "epoch": 2.258254534180747, "grad_norm": 0.35521289706230164, "learning_rate": 1.747875617044521e-06, "loss": 0.3106, "step": 4856 }, { "epoch": 2.258719578359944, "grad_norm": 0.3456581234931946, "learning_rate": 1.7458207433638225e-06, "loss": 0.3315, "step": 4857 }, { "epoch": 2.2591846225391414, "grad_norm": 0.3502543270587921, "learning_rate": 1.7437668227793714e-06, "loss": 0.308, "step": 4858 }, { "epoch": 2.2596496667183383, "grad_norm": 0.3269558846950531, "learning_rate": 1.7417138558927244e-06, "loss": 0.2988, "step": 4859 }, { "epoch": 2.2601147108975352, "grad_norm": 0.3678964376449585, "learning_rate": 1.7396618433051648e-06, "loss": 0.336, "step": 4860 }, { "epoch": 2.260579755076732, "grad_norm": 0.3400316834449768, "learning_rate": 1.7376107856176928e-06, "loss": 0.3004, "step": 4861 }, { "epoch": 2.2610447992559295, "grad_norm": 0.35273489356040955, "learning_rate": 1.7355606834310317e-06, "loss": 0.3352, "step": 4862 }, { "epoch": 2.2615098434351264, "grad_norm": 0.35708242654800415, "learning_rate": 1.7335115373456202e-06, "loss": 0.3263, "step": 4863 }, { "epoch": 2.2619748876143233, "grad_norm": 0.31572067737579346, "learning_rate": 1.7314633479616227e-06, "loss": 0.2921, "step": 4864 }, { "epoch": 2.26243993179352, "grad_norm": 0.3223276436328888, "learning_rate": 1.7294161158789197e-06, "loss": 0.3239, "step": 4865 }, { "epoch": 2.2629049759727176, "grad_norm": 0.32633039355278015, "learning_rate": 1.7273698416971095e-06, "loss": 0.3272, "step": 4866 }, { "epoch": 2.2633700201519145, "grad_norm": 0.3426065444946289, "learning_rate": 1.725324526015517e-06, "loss": 0.3702, "step": 4867 }, { "epoch": 2.2638350643311114, "grad_norm": 0.33616894483566284, "learning_rate": 1.723280169433178e-06, "loss": 0.3157, "step": 4868 }, { "epoch": 2.2643001085103087, "grad_norm": 0.3157622516155243, "learning_rate": 1.7212367725488544e-06, "loss": 0.282, "step": 4869 }, { "epoch": 2.2647651526895056, "grad_norm": 0.3689837157726288, "learning_rate": 1.7191943359610214e-06, "loss": 0.3646, "step": 4870 }, { "epoch": 2.2652301968687025, "grad_norm": 0.337637722492218, "learning_rate": 1.7171528602678767e-06, "loss": 0.3203, "step": 4871 }, { "epoch": 2.2656952410478994, "grad_norm": 0.34560054540634155, "learning_rate": 1.7151123460673353e-06, "loss": 0.3025, "step": 4872 }, { "epoch": 2.2661602852270963, "grad_norm": 0.3452926576137543, "learning_rate": 1.7130727939570325e-06, "loss": 0.3539, "step": 4873 }, { "epoch": 2.2666253294062937, "grad_norm": 0.3485986888408661, "learning_rate": 1.7110342045343164e-06, "loss": 0.3004, "step": 4874 }, { "epoch": 2.2670903735854906, "grad_norm": 0.3492816984653473, "learning_rate": 1.7089965783962608e-06, "loss": 0.2975, "step": 4875 }, { "epoch": 2.2675554177646875, "grad_norm": 0.3906685709953308, "learning_rate": 1.7069599161396488e-06, "loss": 0.3477, "step": 4876 }, { "epoch": 2.268020461943885, "grad_norm": 0.35646331310272217, "learning_rate": 1.70492421836099e-06, "loss": 0.3069, "step": 4877 }, { "epoch": 2.2684855061230818, "grad_norm": 0.35431551933288574, "learning_rate": 1.7028894856565036e-06, "loss": 0.305, "step": 4878 }, { "epoch": 2.2689505503022787, "grad_norm": 0.3560280501842499, "learning_rate": 1.700855718622133e-06, "loss": 0.3439, "step": 4879 }, { "epoch": 2.2694155944814756, "grad_norm": 0.3366261124610901, "learning_rate": 1.698822917853532e-06, "loss": 0.3093, "step": 4880 }, { "epoch": 2.269880638660673, "grad_norm": 0.3454442322254181, "learning_rate": 1.6967910839460788e-06, "loss": 0.3045, "step": 4881 }, { "epoch": 2.27034568283987, "grad_norm": 0.360445499420166, "learning_rate": 1.6947602174948609e-06, "loss": 0.3238, "step": 4882 }, { "epoch": 2.2708107270190667, "grad_norm": 0.342626690864563, "learning_rate": 1.6927303190946876e-06, "loss": 0.3257, "step": 4883 }, { "epoch": 2.2712757711982636, "grad_norm": 0.3575427532196045, "learning_rate": 1.6907013893400838e-06, "loss": 0.334, "step": 4884 }, { "epoch": 2.271740815377461, "grad_norm": 0.3537481427192688, "learning_rate": 1.6886734288252904e-06, "loss": 0.3557, "step": 4885 }, { "epoch": 2.272205859556658, "grad_norm": 0.32836195826530457, "learning_rate": 1.6866464381442622e-06, "loss": 0.3094, "step": 4886 }, { "epoch": 2.272670903735855, "grad_norm": 0.3523426651954651, "learning_rate": 1.6846204178906744e-06, "loss": 0.3144, "step": 4887 }, { "epoch": 2.273135947915052, "grad_norm": 0.35294294357299805, "learning_rate": 1.6825953686579126e-06, "loss": 0.3225, "step": 4888 }, { "epoch": 2.273600992094249, "grad_norm": 0.33541440963745117, "learning_rate": 1.6805712910390836e-06, "loss": 0.3106, "step": 4889 }, { "epoch": 2.274066036273446, "grad_norm": 0.3566812574863434, "learning_rate": 1.6785481856270042e-06, "loss": 0.3216, "step": 4890 }, { "epoch": 2.274531080452643, "grad_norm": 0.3586536645889282, "learning_rate": 1.6765260530142114e-06, "loss": 0.3015, "step": 4891 }, { "epoch": 2.27499612463184, "grad_norm": 0.31585457921028137, "learning_rate": 1.6745048937929525e-06, "loss": 0.295, "step": 4892 }, { "epoch": 2.275461168811037, "grad_norm": 0.36500096321105957, "learning_rate": 1.6724847085551955e-06, "loss": 0.3317, "step": 4893 }, { "epoch": 2.275926212990234, "grad_norm": 0.3432101309299469, "learning_rate": 1.6704654978926167e-06, "loss": 0.3005, "step": 4894 }, { "epoch": 2.276391257169431, "grad_norm": 0.34062010049819946, "learning_rate": 1.6684472623966108e-06, "loss": 0.2974, "step": 4895 }, { "epoch": 2.2768563013486283, "grad_norm": 0.33995094895362854, "learning_rate": 1.666430002658287e-06, "loss": 0.3219, "step": 4896 }, { "epoch": 2.277321345527825, "grad_norm": 0.32602110505104065, "learning_rate": 1.6644137192684694e-06, "loss": 0.3365, "step": 4897 }, { "epoch": 2.277786389707022, "grad_norm": 0.3418422341346741, "learning_rate": 1.6623984128176912e-06, "loss": 0.3219, "step": 4898 }, { "epoch": 2.2782514338862194, "grad_norm": 0.3250005841255188, "learning_rate": 1.6603840838962066e-06, "loss": 0.3283, "step": 4899 }, { "epoch": 2.2787164780654163, "grad_norm": 0.366692453622818, "learning_rate": 1.6583707330939774e-06, "loss": 0.3365, "step": 4900 }, { "epoch": 2.2791815222446132, "grad_norm": 0.29823222756385803, "learning_rate": 1.6563583610006806e-06, "loss": 0.3037, "step": 4901 }, { "epoch": 2.27964656642381, "grad_norm": 0.36057764291763306, "learning_rate": 1.6543469682057105e-06, "loss": 0.3333, "step": 4902 }, { "epoch": 2.280111610603007, "grad_norm": 0.34210824966430664, "learning_rate": 1.6523365552981674e-06, "loss": 0.3378, "step": 4903 }, { "epoch": 2.2805766547822044, "grad_norm": 0.3876771926879883, "learning_rate": 1.6503271228668726e-06, "loss": 0.3132, "step": 4904 }, { "epoch": 2.2810416989614013, "grad_norm": 0.36838680505752563, "learning_rate": 1.6483186715003523e-06, "loss": 0.3283, "step": 4905 }, { "epoch": 2.281506743140598, "grad_norm": 0.32702258229255676, "learning_rate": 1.6463112017868516e-06, "loss": 0.3193, "step": 4906 }, { "epoch": 2.2819717873197956, "grad_norm": 0.33795708417892456, "learning_rate": 1.6443047143143248e-06, "loss": 0.3234, "step": 4907 }, { "epoch": 2.2824368314989925, "grad_norm": 0.3263553977012634, "learning_rate": 1.6422992096704422e-06, "loss": 0.2925, "step": 4908 }, { "epoch": 2.2829018756781894, "grad_norm": 0.38260510563850403, "learning_rate": 1.6402946884425796e-06, "loss": 0.3497, "step": 4909 }, { "epoch": 2.2833669198573863, "grad_norm": 0.38565513491630554, "learning_rate": 1.6382911512178323e-06, "loss": 0.32, "step": 4910 }, { "epoch": 2.2838319640365836, "grad_norm": 0.3692467212677002, "learning_rate": 1.6362885985830001e-06, "loss": 0.3332, "step": 4911 }, { "epoch": 2.2842970082157805, "grad_norm": 0.3257414996623993, "learning_rate": 1.6342870311246024e-06, "loss": 0.3331, "step": 4912 }, { "epoch": 2.2847620523949774, "grad_norm": 0.3339729607105255, "learning_rate": 1.6322864494288616e-06, "loss": 0.3091, "step": 4913 }, { "epoch": 2.2852270965741743, "grad_norm": 0.32545122504234314, "learning_rate": 1.6302868540817184e-06, "loss": 0.3183, "step": 4914 }, { "epoch": 2.2856921407533717, "grad_norm": 0.33002758026123047, "learning_rate": 1.6282882456688197e-06, "loss": 0.3126, "step": 4915 }, { "epoch": 2.2861571849325686, "grad_norm": 0.3634626865386963, "learning_rate": 1.6262906247755284e-06, "loss": 0.3097, "step": 4916 }, { "epoch": 2.2866222291117655, "grad_norm": 0.33301109075546265, "learning_rate": 1.6242939919869117e-06, "loss": 0.3269, "step": 4917 }, { "epoch": 2.287087273290963, "grad_norm": 0.3408260941505432, "learning_rate": 1.6222983478877525e-06, "loss": 0.3362, "step": 4918 }, { "epoch": 2.2875523174701597, "grad_norm": 0.35351526737213135, "learning_rate": 1.6203036930625427e-06, "loss": 0.3216, "step": 4919 }, { "epoch": 2.2880173616493567, "grad_norm": 0.3414493799209595, "learning_rate": 1.618310028095486e-06, "loss": 0.356, "step": 4920 }, { "epoch": 2.2884824058285536, "grad_norm": 0.3114349842071533, "learning_rate": 1.6163173535704913e-06, "loss": 0.3032, "step": 4921 }, { "epoch": 2.288947450007751, "grad_norm": 0.3438819944858551, "learning_rate": 1.6143256700711835e-06, "loss": 0.3043, "step": 4922 }, { "epoch": 2.289412494186948, "grad_norm": 0.35198479890823364, "learning_rate": 1.6123349781808911e-06, "loss": 0.3026, "step": 4923 }, { "epoch": 2.2898775383661447, "grad_norm": 0.339100182056427, "learning_rate": 1.610345278482659e-06, "loss": 0.3188, "step": 4924 }, { "epoch": 2.2903425825453416, "grad_norm": 0.3438659608364105, "learning_rate": 1.6083565715592343e-06, "loss": 0.311, "step": 4925 }, { "epoch": 2.290807626724539, "grad_norm": 0.3378322720527649, "learning_rate": 1.606368857993081e-06, "loss": 0.3261, "step": 4926 }, { "epoch": 2.291272670903736, "grad_norm": 0.3330194652080536, "learning_rate": 1.6043821383663638e-06, "loss": 0.3251, "step": 4927 }, { "epoch": 2.291737715082933, "grad_norm": 0.3478861451148987, "learning_rate": 1.6023964132609642e-06, "loss": 0.3453, "step": 4928 }, { "epoch": 2.29220275926213, "grad_norm": 0.33348485827445984, "learning_rate": 1.600411683258466e-06, "loss": 0.3251, "step": 4929 }, { "epoch": 2.292667803441327, "grad_norm": 0.33961689472198486, "learning_rate": 1.5984279489401655e-06, "loss": 0.3144, "step": 4930 }, { "epoch": 2.293132847620524, "grad_norm": 0.33800020813941956, "learning_rate": 1.596445210887067e-06, "loss": 0.3234, "step": 4931 }, { "epoch": 2.293597891799721, "grad_norm": 0.3581719398498535, "learning_rate": 1.5944634696798827e-06, "loss": 0.3088, "step": 4932 }, { "epoch": 2.2940629359789177, "grad_norm": 0.32927682995796204, "learning_rate": 1.5924827258990305e-06, "loss": 0.2917, "step": 4933 }, { "epoch": 2.294527980158115, "grad_norm": 0.3471710681915283, "learning_rate": 1.5905029801246401e-06, "loss": 0.3399, "step": 4934 }, { "epoch": 2.294993024337312, "grad_norm": 0.36960649490356445, "learning_rate": 1.5885242329365448e-06, "loss": 0.319, "step": 4935 }, { "epoch": 2.295458068516509, "grad_norm": 0.3120764493942261, "learning_rate": 1.5865464849142897e-06, "loss": 0.309, "step": 4936 }, { "epoch": 2.2959231126957063, "grad_norm": 0.32464638352394104, "learning_rate": 1.5845697366371237e-06, "loss": 0.3333, "step": 4937 }, { "epoch": 2.296388156874903, "grad_norm": 0.35078492760658264, "learning_rate": 1.5825939886840036e-06, "loss": 0.3327, "step": 4938 }, { "epoch": 2.2968532010541, "grad_norm": 0.3693057596683502, "learning_rate": 1.5806192416335959e-06, "loss": 0.2898, "step": 4939 }, { "epoch": 2.297318245233297, "grad_norm": 0.39447295665740967, "learning_rate": 1.5786454960642694e-06, "loss": 0.3496, "step": 4940 }, { "epoch": 2.2977832894124943, "grad_norm": 0.30697473883628845, "learning_rate": 1.576672752554103e-06, "loss": 0.2768, "step": 4941 }, { "epoch": 2.2982483335916912, "grad_norm": 0.33453670144081116, "learning_rate": 1.574701011680882e-06, "loss": 0.3336, "step": 4942 }, { "epoch": 2.298713377770888, "grad_norm": 0.3246103525161743, "learning_rate": 1.572730274022099e-06, "loss": 0.318, "step": 4943 }, { "epoch": 2.299178421950085, "grad_norm": 0.38411572575569153, "learning_rate": 1.570760540154947e-06, "loss": 0.3616, "step": 4944 }, { "epoch": 2.2996434661292824, "grad_norm": 0.35833507776260376, "learning_rate": 1.5687918106563326e-06, "loss": 0.323, "step": 4945 }, { "epoch": 2.3001085103084793, "grad_norm": 0.3165144920349121, "learning_rate": 1.566824086102862e-06, "loss": 0.2817, "step": 4946 }, { "epoch": 2.300573554487676, "grad_norm": 0.3616514801979065, "learning_rate": 1.5648573670708527e-06, "loss": 0.3341, "step": 4947 }, { "epoch": 2.3010385986668735, "grad_norm": 0.35235756635665894, "learning_rate": 1.562891654136321e-06, "loss": 0.3283, "step": 4948 }, { "epoch": 2.3015036428460705, "grad_norm": 0.3527206480503082, "learning_rate": 1.560926947874996e-06, "loss": 0.3329, "step": 4949 }, { "epoch": 2.3019686870252674, "grad_norm": 0.35835447907447815, "learning_rate": 1.5589632488623053e-06, "loss": 0.3169, "step": 4950 }, { "epoch": 2.3024337312044643, "grad_norm": 0.33192187547683716, "learning_rate": 1.557000557673387e-06, "loss": 0.2644, "step": 4951 }, { "epoch": 2.3028987753836616, "grad_norm": 0.3572143614292145, "learning_rate": 1.5550388748830786e-06, "loss": 0.3176, "step": 4952 }, { "epoch": 2.3033638195628585, "grad_norm": 0.34204208850860596, "learning_rate": 1.5530782010659267e-06, "loss": 0.3265, "step": 4953 }, { "epoch": 2.3038288637420554, "grad_norm": 0.3122238516807556, "learning_rate": 1.5511185367961813e-06, "loss": 0.2969, "step": 4954 }, { "epoch": 2.3042939079212523, "grad_norm": 0.34026968479156494, "learning_rate": 1.5491598826477967e-06, "loss": 0.3238, "step": 4955 }, { "epoch": 2.3047589521004497, "grad_norm": 0.31696975231170654, "learning_rate": 1.5472022391944285e-06, "loss": 0.2969, "step": 4956 }, { "epoch": 2.3052239962796466, "grad_norm": 0.38175731897354126, "learning_rate": 1.5452456070094419e-06, "loss": 0.3834, "step": 4957 }, { "epoch": 2.3056890404588435, "grad_norm": 0.31309717893600464, "learning_rate": 1.543289986665899e-06, "loss": 0.2931, "step": 4958 }, { "epoch": 2.306154084638041, "grad_norm": 0.3553571403026581, "learning_rate": 1.5413353787365726e-06, "loss": 0.3586, "step": 4959 }, { "epoch": 2.3066191288172377, "grad_norm": 0.3609411418437958, "learning_rate": 1.5393817837939328e-06, "loss": 0.2954, "step": 4960 }, { "epoch": 2.3070841729964346, "grad_norm": 0.3583955466747284, "learning_rate": 1.537429202410159e-06, "loss": 0.3251, "step": 4961 }, { "epoch": 2.3075492171756316, "grad_norm": 0.3274742662906647, "learning_rate": 1.5354776351571266e-06, "loss": 0.2977, "step": 4962 }, { "epoch": 2.308014261354829, "grad_norm": 0.3816562294960022, "learning_rate": 1.533527082606422e-06, "loss": 0.3322, "step": 4963 }, { "epoch": 2.308479305534026, "grad_norm": 0.33534786105155945, "learning_rate": 1.5315775453293269e-06, "loss": 0.3119, "step": 4964 }, { "epoch": 2.3089443497132227, "grad_norm": 0.33446696400642395, "learning_rate": 1.5296290238968303e-06, "loss": 0.3481, "step": 4965 }, { "epoch": 2.3094093938924196, "grad_norm": 0.32276517152786255, "learning_rate": 1.5276815188796235e-06, "loss": 0.3235, "step": 4966 }, { "epoch": 2.309874438071617, "grad_norm": 0.3487582504749298, "learning_rate": 1.5257350308480994e-06, "loss": 0.3368, "step": 4967 }, { "epoch": 2.310339482250814, "grad_norm": 0.32539722323417664, "learning_rate": 1.5237895603723501e-06, "loss": 0.3009, "step": 4968 }, { "epoch": 2.3108045264300108, "grad_norm": 0.3673970699310303, "learning_rate": 1.5218451080221763e-06, "loss": 0.3305, "step": 4969 }, { "epoch": 2.311269570609208, "grad_norm": 0.31702741980552673, "learning_rate": 1.5199016743670719e-06, "loss": 0.2929, "step": 4970 }, { "epoch": 2.311734614788405, "grad_norm": 0.3536950647830963, "learning_rate": 1.517959259976241e-06, "loss": 0.3543, "step": 4971 }, { "epoch": 2.312199658967602, "grad_norm": 0.3390069603919983, "learning_rate": 1.5160178654185836e-06, "loss": 0.3179, "step": 4972 }, { "epoch": 2.312664703146799, "grad_norm": 0.3432963192462921, "learning_rate": 1.5140774912627005e-06, "loss": 0.3357, "step": 4973 }, { "epoch": 2.3131297473259957, "grad_norm": 0.35164210200309753, "learning_rate": 1.5121381380769002e-06, "loss": 0.3311, "step": 4974 }, { "epoch": 2.313594791505193, "grad_norm": 0.33762413263320923, "learning_rate": 1.5101998064291828e-06, "loss": 0.3111, "step": 4975 }, { "epoch": 2.31405983568439, "grad_norm": 0.3147108852863312, "learning_rate": 1.5082624968872578e-06, "loss": 0.2938, "step": 4976 }, { "epoch": 2.314524879863587, "grad_norm": 0.3715774714946747, "learning_rate": 1.50632621001853e-06, "loss": 0.3709, "step": 4977 }, { "epoch": 2.3149899240427843, "grad_norm": 0.32812413573265076, "learning_rate": 1.5043909463901086e-06, "loss": 0.3058, "step": 4978 }, { "epoch": 2.315454968221981, "grad_norm": 0.3472438156604767, "learning_rate": 1.5024567065687977e-06, "loss": 0.3163, "step": 4979 }, { "epoch": 2.315920012401178, "grad_norm": 0.3634364604949951, "learning_rate": 1.500523491121108e-06, "loss": 0.3306, "step": 4980 }, { "epoch": 2.316385056580375, "grad_norm": 0.3792670667171478, "learning_rate": 1.4985913006132435e-06, "loss": 0.336, "step": 4981 }, { "epoch": 2.3168501007595723, "grad_norm": 0.32765889167785645, "learning_rate": 1.496660135611115e-06, "loss": 0.3053, "step": 4982 }, { "epoch": 2.3173151449387692, "grad_norm": 0.35471829771995544, "learning_rate": 1.4947299966803259e-06, "loss": 0.3403, "step": 4983 }, { "epoch": 2.317780189117966, "grad_norm": 0.3708203136920929, "learning_rate": 1.4928008843861851e-06, "loss": 0.2946, "step": 4984 }, { "epoch": 2.318245233297163, "grad_norm": 0.3640722930431366, "learning_rate": 1.490872799293696e-06, "loss": 0.3321, "step": 4985 }, { "epoch": 2.3187102774763604, "grad_norm": 0.33545586466789246, "learning_rate": 1.4889457419675669e-06, "loss": 0.2907, "step": 4986 }, { "epoch": 2.3191753216555573, "grad_norm": 0.3201104402542114, "learning_rate": 1.487019712972197e-06, "loss": 0.3066, "step": 4987 }, { "epoch": 2.319640365834754, "grad_norm": 0.32187438011169434, "learning_rate": 1.4850947128716914e-06, "loss": 0.3197, "step": 4988 }, { "epoch": 2.3201054100139515, "grad_norm": 0.3507417142391205, "learning_rate": 1.4831707422298513e-06, "loss": 0.323, "step": 4989 }, { "epoch": 2.3205704541931484, "grad_norm": 0.3247746229171753, "learning_rate": 1.4812478016101784e-06, "loss": 0.3163, "step": 4990 }, { "epoch": 2.3210354983723454, "grad_norm": 0.34104475378990173, "learning_rate": 1.4793258915758668e-06, "loss": 0.3392, "step": 4991 }, { "epoch": 2.3215005425515423, "grad_norm": 0.32340723276138306, "learning_rate": 1.4774050126898164e-06, "loss": 0.3241, "step": 4992 }, { "epoch": 2.3219655867307396, "grad_norm": 0.348816454410553, "learning_rate": 1.475485165514618e-06, "loss": 0.3026, "step": 4993 }, { "epoch": 2.3224306309099365, "grad_norm": 0.372328519821167, "learning_rate": 1.473566350612567e-06, "loss": 0.3019, "step": 4994 }, { "epoch": 2.3228956750891334, "grad_norm": 0.3437679409980774, "learning_rate": 1.47164856854565e-06, "loss": 0.3165, "step": 4995 }, { "epoch": 2.3233607192683303, "grad_norm": 0.36129605770111084, "learning_rate": 1.4697318198755572e-06, "loss": 0.3472, "step": 4996 }, { "epoch": 2.3238257634475277, "grad_norm": 0.32315579056739807, "learning_rate": 1.4678161051636703e-06, "loss": 0.3008, "step": 4997 }, { "epoch": 2.3242908076267246, "grad_norm": 0.35113513469696045, "learning_rate": 1.4659014249710734e-06, "loss": 0.3222, "step": 4998 }, { "epoch": 2.3247558518059215, "grad_norm": 0.3178529143333435, "learning_rate": 1.4639877798585434e-06, "loss": 0.3028, "step": 4999 }, { "epoch": 2.325220895985119, "grad_norm": 0.31862160563468933, "learning_rate": 1.462075170386556e-06, "loss": 0.306, "step": 5000 }, { "epoch": 2.3256859401643157, "grad_norm": 0.3178994953632355, "learning_rate": 1.4601635971152844e-06, "loss": 0.3442, "step": 5001 }, { "epoch": 2.3261509843435126, "grad_norm": 0.33765512704849243, "learning_rate": 1.4582530606045986e-06, "loss": 0.3313, "step": 5002 }, { "epoch": 2.3266160285227095, "grad_norm": 0.3193725347518921, "learning_rate": 1.456343561414061e-06, "loss": 0.2883, "step": 5003 }, { "epoch": 2.3270810727019065, "grad_norm": 0.3627553880214691, "learning_rate": 1.4544351001029349e-06, "loss": 0.3271, "step": 5004 }, { "epoch": 2.327546116881104, "grad_norm": 0.3708834648132324, "learning_rate": 1.4525276772301761e-06, "loss": 0.3387, "step": 5005 }, { "epoch": 2.3280111610603007, "grad_norm": 0.33413180708885193, "learning_rate": 1.4506212933544394e-06, "loss": 0.3144, "step": 5006 }, { "epoch": 2.3284762052394976, "grad_norm": 0.3867631256580353, "learning_rate": 1.4487159490340714e-06, "loss": 0.3447, "step": 5007 }, { "epoch": 2.328941249418695, "grad_norm": 0.3161846697330475, "learning_rate": 1.4468116448271196e-06, "loss": 0.3167, "step": 5008 }, { "epoch": 2.329406293597892, "grad_norm": 0.3061203956604004, "learning_rate": 1.4449083812913217e-06, "loss": 0.3078, "step": 5009 }, { "epoch": 2.3298713377770888, "grad_norm": 0.33758896589279175, "learning_rate": 1.4430061589841122e-06, "loss": 0.3081, "step": 5010 }, { "epoch": 2.3303363819562857, "grad_norm": 0.37586334347724915, "learning_rate": 1.4411049784626213e-06, "loss": 0.324, "step": 5011 }, { "epoch": 2.330801426135483, "grad_norm": 0.36526045203208923, "learning_rate": 1.4392048402836744e-06, "loss": 0.3264, "step": 5012 }, { "epoch": 2.33126647031468, "grad_norm": 0.3223170042037964, "learning_rate": 1.437305745003793e-06, "loss": 0.2935, "step": 5013 }, { "epoch": 2.331731514493877, "grad_norm": 0.3323318660259247, "learning_rate": 1.4354076931791876e-06, "loss": 0.3261, "step": 5014 }, { "epoch": 2.3321965586730737, "grad_norm": 0.3371385931968689, "learning_rate": 1.433510685365771e-06, "loss": 0.3157, "step": 5015 }, { "epoch": 2.332661602852271, "grad_norm": 0.31770241260528564, "learning_rate": 1.4316147221191411e-06, "loss": 0.3195, "step": 5016 }, { "epoch": 2.333126647031468, "grad_norm": 0.35454097390174866, "learning_rate": 1.4297198039945998e-06, "loss": 0.3435, "step": 5017 }, { "epoch": 2.333591691210665, "grad_norm": 0.34481361508369446, "learning_rate": 1.4278259315471332e-06, "loss": 0.312, "step": 5018 }, { "epoch": 2.3340567353898622, "grad_norm": 0.37690550088882446, "learning_rate": 1.425933105331429e-06, "loss": 0.3404, "step": 5019 }, { "epoch": 2.334521779569059, "grad_norm": 0.32657355070114136, "learning_rate": 1.4240413259018632e-06, "loss": 0.3181, "step": 5020 }, { "epoch": 2.334986823748256, "grad_norm": 0.33882707357406616, "learning_rate": 1.4221505938125097e-06, "loss": 0.3137, "step": 5021 }, { "epoch": 2.335451867927453, "grad_norm": 0.35311123728752136, "learning_rate": 1.42026090961713e-06, "loss": 0.3127, "step": 5022 }, { "epoch": 2.3359169121066503, "grad_norm": 0.3354308307170868, "learning_rate": 1.4183722738691834e-06, "loss": 0.3445, "step": 5023 }, { "epoch": 2.336381956285847, "grad_norm": 0.3541325330734253, "learning_rate": 1.4164846871218213e-06, "loss": 0.3136, "step": 5024 }, { "epoch": 2.336847000465044, "grad_norm": 0.32369163632392883, "learning_rate": 1.4145981499278877e-06, "loss": 0.3183, "step": 5025 }, { "epoch": 2.337312044644241, "grad_norm": 0.33266210556030273, "learning_rate": 1.4127126628399168e-06, "loss": 0.3208, "step": 5026 }, { "epoch": 2.3377770888234384, "grad_norm": 0.3531791567802429, "learning_rate": 1.410828226410139e-06, "loss": 0.3399, "step": 5027 }, { "epoch": 2.3382421330026353, "grad_norm": 0.3102116882801056, "learning_rate": 1.4089448411904733e-06, "loss": 0.3203, "step": 5028 }, { "epoch": 2.338707177181832, "grad_norm": 0.39151903986930847, "learning_rate": 1.4070625077325345e-06, "loss": 0.3452, "step": 5029 }, { "epoch": 2.3391722213610295, "grad_norm": 0.3667720854282379, "learning_rate": 1.4051812265876257e-06, "loss": 0.2859, "step": 5030 }, { "epoch": 2.3396372655402264, "grad_norm": 0.30760088562965393, "learning_rate": 1.4033009983067454e-06, "loss": 0.2963, "step": 5031 }, { "epoch": 2.3401023097194233, "grad_norm": 0.3295292854309082, "learning_rate": 1.4014218234405796e-06, "loss": 0.3199, "step": 5032 }, { "epoch": 2.3405673538986203, "grad_norm": 0.321175754070282, "learning_rate": 1.3995437025395109e-06, "loss": 0.3068, "step": 5033 }, { "epoch": 2.341032398077817, "grad_norm": 0.35323259234428406, "learning_rate": 1.3976666361536074e-06, "loss": 0.2936, "step": 5034 }, { "epoch": 2.3414974422570145, "grad_norm": 0.3676642179489136, "learning_rate": 1.395790624832633e-06, "loss": 0.3327, "step": 5035 }, { "epoch": 2.3419624864362114, "grad_norm": 0.31863468885421753, "learning_rate": 1.3939156691260407e-06, "loss": 0.2977, "step": 5036 }, { "epoch": 2.3424275306154083, "grad_norm": 0.3256819248199463, "learning_rate": 1.392041769582977e-06, "loss": 0.3326, "step": 5037 }, { "epoch": 2.3428925747946057, "grad_norm": 0.3244876265525818, "learning_rate": 1.3901689267522718e-06, "loss": 0.3405, "step": 5038 }, { "epoch": 2.3433576189738026, "grad_norm": 0.31455737352371216, "learning_rate": 1.3882971411824547e-06, "loss": 0.3042, "step": 5039 }, { "epoch": 2.3438226631529995, "grad_norm": 0.32401779294013977, "learning_rate": 1.386426413421738e-06, "loss": 0.3078, "step": 5040 }, { "epoch": 2.3442877073321964, "grad_norm": 0.33598318696022034, "learning_rate": 1.3845567440180308e-06, "loss": 0.297, "step": 5041 }, { "epoch": 2.3447527515113937, "grad_norm": 0.35334211587905884, "learning_rate": 1.3826881335189258e-06, "loss": 0.3692, "step": 5042 }, { "epoch": 2.3452177956905906, "grad_norm": 0.35467445850372314, "learning_rate": 1.3808205824717108e-06, "loss": 0.3316, "step": 5043 }, { "epoch": 2.3456828398697875, "grad_norm": 0.34783780574798584, "learning_rate": 1.3789540914233607e-06, "loss": 0.316, "step": 5044 }, { "epoch": 2.3461478840489844, "grad_norm": 0.36762094497680664, "learning_rate": 1.3770886609205381e-06, "loss": 0.3563, "step": 5045 }, { "epoch": 2.346612928228182, "grad_norm": 0.35472989082336426, "learning_rate": 1.3752242915095993e-06, "loss": 0.337, "step": 5046 }, { "epoch": 2.3470779724073787, "grad_norm": 0.33739933371543884, "learning_rate": 1.373360983736588e-06, "loss": 0.2881, "step": 5047 }, { "epoch": 2.3475430165865756, "grad_norm": 0.374533474445343, "learning_rate": 1.3714987381472378e-06, "loss": 0.3296, "step": 5048 }, { "epoch": 2.348008060765773, "grad_norm": 0.32988038659095764, "learning_rate": 1.3696375552869673e-06, "loss": 0.316, "step": 5049 }, { "epoch": 2.34847310494497, "grad_norm": 0.33644863963127136, "learning_rate": 1.36777743570089e-06, "loss": 0.3117, "step": 5050 }, { "epoch": 2.3489381491241668, "grad_norm": 0.3263695538043976, "learning_rate": 1.365918379933801e-06, "loss": 0.299, "step": 5051 }, { "epoch": 2.3494031933033637, "grad_norm": 0.3548851013183594, "learning_rate": 1.3640603885301917e-06, "loss": 0.3732, "step": 5052 }, { "epoch": 2.349868237482561, "grad_norm": 0.32212895154953003, "learning_rate": 1.362203462034234e-06, "loss": 0.3201, "step": 5053 }, { "epoch": 2.350333281661758, "grad_norm": 0.3988026976585388, "learning_rate": 1.3603476009897942e-06, "loss": 0.3429, "step": 5054 }, { "epoch": 2.350798325840955, "grad_norm": 0.3584456145763397, "learning_rate": 1.3584928059404207e-06, "loss": 0.3324, "step": 5055 }, { "epoch": 2.3512633700201517, "grad_norm": 0.315178781747818, "learning_rate": 1.356639077429357e-06, "loss": 0.3269, "step": 5056 }, { "epoch": 2.351728414199349, "grad_norm": 0.3512877821922302, "learning_rate": 1.354786415999526e-06, "loss": 0.3097, "step": 5057 }, { "epoch": 2.352193458378546, "grad_norm": 0.35129326581954956, "learning_rate": 1.352934822193544e-06, "loss": 0.3193, "step": 5058 }, { "epoch": 2.352658502557743, "grad_norm": 0.3396090269088745, "learning_rate": 1.351084296553713e-06, "loss": 0.2999, "step": 5059 }, { "epoch": 2.3531235467369402, "grad_norm": 0.3327389359474182, "learning_rate": 1.3492348396220229e-06, "loss": 0.3217, "step": 5060 }, { "epoch": 2.353588590916137, "grad_norm": 0.31320449709892273, "learning_rate": 1.3473864519401463e-06, "loss": 0.2914, "step": 5061 }, { "epoch": 2.354053635095334, "grad_norm": 0.343872994184494, "learning_rate": 1.34553913404945e-06, "loss": 0.3318, "step": 5062 }, { "epoch": 2.354518679274531, "grad_norm": 0.31648504734039307, "learning_rate": 1.3436928864909799e-06, "loss": 0.2752, "step": 5063 }, { "epoch": 2.354983723453728, "grad_norm": 0.38725370168685913, "learning_rate": 1.341847709805475e-06, "loss": 0.3429, "step": 5064 }, { "epoch": 2.355448767632925, "grad_norm": 0.3474979102611542, "learning_rate": 1.3400036045333542e-06, "loss": 0.3161, "step": 5065 }, { "epoch": 2.355913811812122, "grad_norm": 0.3323841392993927, "learning_rate": 1.3381605712147294e-06, "loss": 0.2989, "step": 5066 }, { "epoch": 2.356378855991319, "grad_norm": 0.3497825562953949, "learning_rate": 1.3363186103893916e-06, "loss": 0.356, "step": 5067 }, { "epoch": 2.3568439001705164, "grad_norm": 0.35843122005462646, "learning_rate": 1.3344777225968247e-06, "loss": 0.3005, "step": 5068 }, { "epoch": 2.3573089443497133, "grad_norm": 0.3550424873828888, "learning_rate": 1.332637908376192e-06, "loss": 0.3315, "step": 5069 }, { "epoch": 2.35777398852891, "grad_norm": 0.32174983620643616, "learning_rate": 1.3307991682663463e-06, "loss": 0.2728, "step": 5070 }, { "epoch": 2.358239032708107, "grad_norm": 0.37295761704444885, "learning_rate": 1.328961502805825e-06, "loss": 0.3422, "step": 5071 }, { "epoch": 2.3587040768873044, "grad_norm": 0.34429284930229187, "learning_rate": 1.3271249125328512e-06, "loss": 0.3188, "step": 5072 }, { "epoch": 2.3591691210665013, "grad_norm": 0.32858747243881226, "learning_rate": 1.3252893979853304e-06, "loss": 0.2941, "step": 5073 }, { "epoch": 2.3596341652456982, "grad_norm": 0.3379555344581604, "learning_rate": 1.3234549597008572e-06, "loss": 0.3626, "step": 5074 }, { "epoch": 2.360099209424895, "grad_norm": 0.33183690905570984, "learning_rate": 1.3216215982167064e-06, "loss": 0.3313, "step": 5075 }, { "epoch": 2.3605642536040925, "grad_norm": 0.386362761259079, "learning_rate": 1.3197893140698426e-06, "loss": 0.3743, "step": 5076 }, { "epoch": 2.3610292977832894, "grad_norm": 0.3396303653717041, "learning_rate": 1.3179581077969084e-06, "loss": 0.2914, "step": 5077 }, { "epoch": 2.3614943419624863, "grad_norm": 0.3387748599052429, "learning_rate": 1.3161279799342385e-06, "loss": 0.3185, "step": 5078 }, { "epoch": 2.3619593861416837, "grad_norm": 0.32866546511650085, "learning_rate": 1.314298931017844e-06, "loss": 0.2987, "step": 5079 }, { "epoch": 2.3624244303208806, "grad_norm": 0.3564012348651886, "learning_rate": 1.3124709615834263e-06, "loss": 0.3356, "step": 5080 }, { "epoch": 2.3628894745000775, "grad_norm": 0.34756961464881897, "learning_rate": 1.3106440721663655e-06, "loss": 0.358, "step": 5081 }, { "epoch": 2.3633545186792744, "grad_norm": 0.3577297627925873, "learning_rate": 1.3088182633017294e-06, "loss": 0.3194, "step": 5082 }, { "epoch": 2.3638195628584717, "grad_norm": 0.3133348822593689, "learning_rate": 1.306993535524269e-06, "loss": 0.3099, "step": 5083 }, { "epoch": 2.3642846070376686, "grad_norm": 0.3245254456996918, "learning_rate": 1.3051698893684144e-06, "loss": 0.3172, "step": 5084 }, { "epoch": 2.3647496512168655, "grad_norm": 0.3281231224536896, "learning_rate": 1.303347325368285e-06, "loss": 0.3242, "step": 5085 }, { "epoch": 2.3652146953960624, "grad_norm": 0.3328626751899719, "learning_rate": 1.3015258440576767e-06, "loss": 0.3048, "step": 5086 }, { "epoch": 2.36567973957526, "grad_norm": 0.34870782494544983, "learning_rate": 1.299705445970076e-06, "loss": 0.2954, "step": 5087 }, { "epoch": 2.3661447837544567, "grad_norm": 0.31751129031181335, "learning_rate": 1.2978861316386437e-06, "loss": 0.3195, "step": 5088 }, { "epoch": 2.3666098279336536, "grad_norm": 0.3291054368019104, "learning_rate": 1.2960679015962313e-06, "loss": 0.3173, "step": 5089 }, { "epoch": 2.367074872112851, "grad_norm": 0.34506654739379883, "learning_rate": 1.2942507563753653e-06, "loss": 0.3536, "step": 5090 }, { "epoch": 2.367539916292048, "grad_norm": 0.3213541507720947, "learning_rate": 1.2924346965082612e-06, "loss": 0.3214, "step": 5091 }, { "epoch": 2.3680049604712448, "grad_norm": 0.3615787625312805, "learning_rate": 1.2906197225268108e-06, "loss": 0.3476, "step": 5092 }, { "epoch": 2.3684700046504417, "grad_norm": 0.32223793864250183, "learning_rate": 1.288805834962591e-06, "loss": 0.2795, "step": 5093 }, { "epoch": 2.3689350488296386, "grad_norm": 0.357710599899292, "learning_rate": 1.2869930343468611e-06, "loss": 0.3424, "step": 5094 }, { "epoch": 2.369400093008836, "grad_norm": 0.3196834921836853, "learning_rate": 1.285181321210562e-06, "loss": 0.2891, "step": 5095 }, { "epoch": 2.369865137188033, "grad_norm": 0.33670881390571594, "learning_rate": 1.2833706960843118e-06, "loss": 0.3391, "step": 5096 }, { "epoch": 2.3703301813672297, "grad_norm": 0.30696120858192444, "learning_rate": 1.2815611594984162e-06, "loss": 0.275, "step": 5097 }, { "epoch": 2.370795225546427, "grad_norm": 0.35666152834892273, "learning_rate": 1.2797527119828567e-06, "loss": 0.342, "step": 5098 }, { "epoch": 2.371260269725624, "grad_norm": 0.31616926193237305, "learning_rate": 1.2779453540673009e-06, "loss": 0.3172, "step": 5099 }, { "epoch": 2.371725313904821, "grad_norm": 0.3312261700630188, "learning_rate": 1.2761390862810907e-06, "loss": 0.3278, "step": 5100 }, { "epoch": 2.372190358084018, "grad_norm": 0.33016443252563477, "learning_rate": 1.274333909153257e-06, "loss": 0.3146, "step": 5101 }, { "epoch": 2.372655402263215, "grad_norm": 0.34783631563186646, "learning_rate": 1.2725298232125034e-06, "loss": 0.3116, "step": 5102 }, { "epoch": 2.373120446442412, "grad_norm": 0.34138286113739014, "learning_rate": 1.27072682898722e-06, "loss": 0.3093, "step": 5103 }, { "epoch": 2.373585490621609, "grad_norm": 0.34735816717147827, "learning_rate": 1.2689249270054716e-06, "loss": 0.3073, "step": 5104 }, { "epoch": 2.374050534800806, "grad_norm": 0.3784792721271515, "learning_rate": 1.2671241177950078e-06, "loss": 0.3441, "step": 5105 }, { "epoch": 2.374515578980003, "grad_norm": 0.3438231945037842, "learning_rate": 1.2653244018832562e-06, "loss": 0.2984, "step": 5106 }, { "epoch": 2.3749806231592, "grad_norm": 0.3264318108558655, "learning_rate": 1.2635257797973255e-06, "loss": 0.3278, "step": 5107 }, { "epoch": 2.375445667338397, "grad_norm": 0.3333817422389984, "learning_rate": 1.2617282520640007e-06, "loss": 0.2979, "step": 5108 }, { "epoch": 2.3759107115175944, "grad_norm": 0.37426552176475525, "learning_rate": 1.2599318192097509e-06, "loss": 0.3836, "step": 5109 }, { "epoch": 2.3763757556967913, "grad_norm": 0.3461456596851349, "learning_rate": 1.2581364817607194e-06, "loss": 0.3214, "step": 5110 }, { "epoch": 2.376840799875988, "grad_norm": 0.35083499550819397, "learning_rate": 1.2563422402427339e-06, "loss": 0.3083, "step": 5111 }, { "epoch": 2.377305844055185, "grad_norm": 0.3530008792877197, "learning_rate": 1.254549095181296e-06, "loss": 0.2944, "step": 5112 }, { "epoch": 2.3777708882343824, "grad_norm": 0.3381586968898773, "learning_rate": 1.2527570471015915e-06, "loss": 0.3435, "step": 5113 }, { "epoch": 2.3782359324135793, "grad_norm": 0.3386650085449219, "learning_rate": 1.2509660965284797e-06, "loss": 0.3303, "step": 5114 }, { "epoch": 2.3787009765927762, "grad_norm": 0.333477646112442, "learning_rate": 1.2491762439865034e-06, "loss": 0.3108, "step": 5115 }, { "epoch": 2.379166020771973, "grad_norm": 0.32221055030822754, "learning_rate": 1.247387489999879e-06, "loss": 0.307, "step": 5116 }, { "epoch": 2.3796310649511705, "grad_norm": 0.35751134157180786, "learning_rate": 1.2455998350925042e-06, "loss": 0.3388, "step": 5117 }, { "epoch": 2.3800961091303674, "grad_norm": 0.3639261722564697, "learning_rate": 1.2438132797879554e-06, "loss": 0.348, "step": 5118 }, { "epoch": 2.3805611533095643, "grad_norm": 0.31775328516960144, "learning_rate": 1.2420278246094835e-06, "loss": 0.2986, "step": 5119 }, { "epoch": 2.3810261974887617, "grad_norm": 0.36067667603492737, "learning_rate": 1.240243470080022e-06, "loss": 0.3553, "step": 5120 }, { "epoch": 2.3814912416679586, "grad_norm": 0.31480494141578674, "learning_rate": 1.2384602167221765e-06, "loss": 0.2935, "step": 5121 }, { "epoch": 2.3819562858471555, "grad_norm": 0.30289679765701294, "learning_rate": 1.2366780650582355e-06, "loss": 0.2969, "step": 5122 }, { "epoch": 2.3824213300263524, "grad_norm": 0.3624131977558136, "learning_rate": 1.2348970156101592e-06, "loss": 0.3583, "step": 5123 }, { "epoch": 2.3828863742055497, "grad_norm": 0.3010319769382477, "learning_rate": 1.233117068899592e-06, "loss": 0.2917, "step": 5124 }, { "epoch": 2.3833514183847466, "grad_norm": 0.3690085709095001, "learning_rate": 1.2313382254478473e-06, "loss": 0.3563, "step": 5125 }, { "epoch": 2.3838164625639435, "grad_norm": 0.33377185463905334, "learning_rate": 1.229560485775923e-06, "loss": 0.3258, "step": 5126 }, { "epoch": 2.3842815067431404, "grad_norm": 0.31576281785964966, "learning_rate": 1.227783850404487e-06, "loss": 0.3138, "step": 5127 }, { "epoch": 2.384746550922338, "grad_norm": 0.3261266350746155, "learning_rate": 1.2260083198538886e-06, "loss": 0.3253, "step": 5128 }, { "epoch": 2.3852115951015347, "grad_norm": 0.34286534786224365, "learning_rate": 1.2242338946441518e-06, "loss": 0.357, "step": 5129 }, { "epoch": 2.3856766392807316, "grad_norm": 0.3160213530063629, "learning_rate": 1.2224605752949786e-06, "loss": 0.3051, "step": 5130 }, { "epoch": 2.386141683459929, "grad_norm": 0.34241053462028503, "learning_rate": 1.2206883623257421e-06, "loss": 0.3066, "step": 5131 }, { "epoch": 2.386606727639126, "grad_norm": 0.34416183829307556, "learning_rate": 1.2189172562554973e-06, "loss": 0.3326, "step": 5132 }, { "epoch": 2.3870717718183228, "grad_norm": 0.3707297742366791, "learning_rate": 1.2171472576029707e-06, "loss": 0.3504, "step": 5133 }, { "epoch": 2.3875368159975197, "grad_norm": 0.31945255398750305, "learning_rate": 1.2153783668865681e-06, "loss": 0.3035, "step": 5134 }, { "epoch": 2.3880018601767166, "grad_norm": 0.33111390471458435, "learning_rate": 1.2136105846243662e-06, "loss": 0.3256, "step": 5135 }, { "epoch": 2.388466904355914, "grad_norm": 0.3572760224342346, "learning_rate": 1.2118439113341224e-06, "loss": 0.3382, "step": 5136 }, { "epoch": 2.388931948535111, "grad_norm": 0.32829439640045166, "learning_rate": 1.210078347533264e-06, "loss": 0.3172, "step": 5137 }, { "epoch": 2.3893969927143077, "grad_norm": 0.3324892818927765, "learning_rate": 1.2083138937388989e-06, "loss": 0.3209, "step": 5138 }, { "epoch": 2.389862036893505, "grad_norm": 0.31397631764411926, "learning_rate": 1.2065505504678038e-06, "loss": 0.3149, "step": 5139 }, { "epoch": 2.390327081072702, "grad_norm": 0.3297927677631378, "learning_rate": 1.2047883182364351e-06, "loss": 0.2948, "step": 5140 }, { "epoch": 2.390792125251899, "grad_norm": 0.33224743604660034, "learning_rate": 1.2030271975609214e-06, "loss": 0.3089, "step": 5141 }, { "epoch": 2.391257169431096, "grad_norm": 0.37292471528053284, "learning_rate": 1.2012671889570683e-06, "loss": 0.3241, "step": 5142 }, { "epoch": 2.391722213610293, "grad_norm": 0.3111366927623749, "learning_rate": 1.1995082929403507e-06, "loss": 0.2858, "step": 5143 }, { "epoch": 2.39218725778949, "grad_norm": 0.37733349204063416, "learning_rate": 1.1977505100259235e-06, "loss": 0.3593, "step": 5144 }, { "epoch": 2.392652301968687, "grad_norm": 0.36650681495666504, "learning_rate": 1.1959938407286099e-06, "loss": 0.3479, "step": 5145 }, { "epoch": 2.393117346147884, "grad_norm": 0.31650200486183167, "learning_rate": 1.1942382855629131e-06, "loss": 0.2705, "step": 5146 }, { "epoch": 2.393582390327081, "grad_norm": 0.33961057662963867, "learning_rate": 1.1924838450430032e-06, "loss": 0.3244, "step": 5147 }, { "epoch": 2.394047434506278, "grad_norm": 0.31274569034576416, "learning_rate": 1.19073051968273e-06, "loss": 0.2908, "step": 5148 }, { "epoch": 2.394512478685475, "grad_norm": 0.32725125551223755, "learning_rate": 1.188978309995612e-06, "loss": 0.311, "step": 5149 }, { "epoch": 2.3949775228646724, "grad_norm": 0.3675840198993683, "learning_rate": 1.1872272164948456e-06, "loss": 0.356, "step": 5150 }, { "epoch": 2.3954425670438693, "grad_norm": 0.3493657410144806, "learning_rate": 1.1854772396932946e-06, "loss": 0.3292, "step": 5151 }, { "epoch": 2.395907611223066, "grad_norm": 0.344338595867157, "learning_rate": 1.1837283801034998e-06, "loss": 0.3117, "step": 5152 }, { "epoch": 2.396372655402263, "grad_norm": 0.344719260931015, "learning_rate": 1.181980638237676e-06, "loss": 0.3277, "step": 5153 }, { "epoch": 2.3968376995814604, "grad_norm": 0.32095858454704285, "learning_rate": 1.1802340146077045e-06, "loss": 0.2881, "step": 5154 }, { "epoch": 2.3973027437606573, "grad_norm": 0.3079981803894043, "learning_rate": 1.1784885097251474e-06, "loss": 0.3379, "step": 5155 }, { "epoch": 2.3977677879398542, "grad_norm": 0.33286038041114807, "learning_rate": 1.1767441241012307e-06, "loss": 0.3374, "step": 5156 }, { "epoch": 2.398232832119051, "grad_norm": 0.36355435848236084, "learning_rate": 1.1750008582468592e-06, "loss": 0.3364, "step": 5157 }, { "epoch": 2.3986978762982485, "grad_norm": 0.3209869861602783, "learning_rate": 1.1732587126726054e-06, "loss": 0.3159, "step": 5158 }, { "epoch": 2.3991629204774454, "grad_norm": 0.32692816853523254, "learning_rate": 1.1715176878887174e-06, "loss": 0.3143, "step": 5159 }, { "epoch": 2.3996279646566423, "grad_norm": 0.345546692609787, "learning_rate": 1.1697777844051105e-06, "loss": 0.3354, "step": 5160 }, { "epoch": 2.4000930088358396, "grad_norm": 0.3152925670146942, "learning_rate": 1.168039002731377e-06, "loss": 0.3035, "step": 5161 }, { "epoch": 2.4005580530150366, "grad_norm": 0.379638671875, "learning_rate": 1.1663013433767756e-06, "loss": 0.3057, "step": 5162 }, { "epoch": 2.4010230971942335, "grad_norm": 0.31644484400749207, "learning_rate": 1.164564806850239e-06, "loss": 0.3047, "step": 5163 }, { "epoch": 2.4014881413734304, "grad_norm": 0.34637710452079773, "learning_rate": 1.1628293936603707e-06, "loss": 0.3396, "step": 5164 }, { "epoch": 2.4019531855526273, "grad_norm": 0.3282802700996399, "learning_rate": 1.1610951043154472e-06, "loss": 0.337, "step": 5165 }, { "epoch": 2.4024182297318246, "grad_norm": 0.30934378504753113, "learning_rate": 1.1593619393234096e-06, "loss": 0.3007, "step": 5166 }, { "epoch": 2.4028832739110215, "grad_norm": 0.33991992473602295, "learning_rate": 1.1576298991918778e-06, "loss": 0.3462, "step": 5167 }, { "epoch": 2.4033483180902184, "grad_norm": 0.30924656987190247, "learning_rate": 1.1558989844281349e-06, "loss": 0.3036, "step": 5168 }, { "epoch": 2.4038133622694158, "grad_norm": 0.341217577457428, "learning_rate": 1.1541691955391403e-06, "loss": 0.3183, "step": 5169 }, { "epoch": 2.4042784064486127, "grad_norm": 0.30020660161972046, "learning_rate": 1.1524405330315187e-06, "loss": 0.3064, "step": 5170 }, { "epoch": 2.4047434506278096, "grad_norm": 0.3430638611316681, "learning_rate": 1.15071299741157e-06, "loss": 0.3619, "step": 5171 }, { "epoch": 2.4052084948070065, "grad_norm": 0.33357352018356323, "learning_rate": 1.148986589185258e-06, "loss": 0.3348, "step": 5172 }, { "epoch": 2.405673538986204, "grad_norm": 0.34539151191711426, "learning_rate": 1.147261308858223e-06, "loss": 0.3226, "step": 5173 }, { "epoch": 2.4061385831654007, "grad_norm": 0.34261536598205566, "learning_rate": 1.145537156935768e-06, "loss": 0.3085, "step": 5174 }, { "epoch": 2.4066036273445977, "grad_norm": 0.3475549817085266, "learning_rate": 1.143814133922872e-06, "loss": 0.326, "step": 5175 }, { "epoch": 2.4070686715237946, "grad_norm": 0.3175617754459381, "learning_rate": 1.142092240324179e-06, "loss": 0.3038, "step": 5176 }, { "epoch": 2.407533715702992, "grad_norm": 0.33959630131721497, "learning_rate": 1.1403714766440061e-06, "loss": 0.3148, "step": 5177 }, { "epoch": 2.407998759882189, "grad_norm": 0.34172576665878296, "learning_rate": 1.1386518433863331e-06, "loss": 0.3174, "step": 5178 }, { "epoch": 2.4084638040613857, "grad_norm": 0.30820226669311523, "learning_rate": 1.1369333410548166e-06, "loss": 0.2975, "step": 5179 }, { "epoch": 2.408928848240583, "grad_norm": 0.3713323473930359, "learning_rate": 1.1352159701527743e-06, "loss": 0.3735, "step": 5180 }, { "epoch": 2.40939389241978, "grad_norm": 0.3337565064430237, "learning_rate": 1.1334997311832003e-06, "loss": 0.2932, "step": 5181 }, { "epoch": 2.409858936598977, "grad_norm": 0.35603073239326477, "learning_rate": 1.1317846246487485e-06, "loss": 0.2922, "step": 5182 }, { "epoch": 2.410323980778174, "grad_norm": 0.34648483991622925, "learning_rate": 1.13007065105175e-06, "loss": 0.3224, "step": 5183 }, { "epoch": 2.410789024957371, "grad_norm": 0.3309410810470581, "learning_rate": 1.128357810894196e-06, "loss": 0.3081, "step": 5184 }, { "epoch": 2.411254069136568, "grad_norm": 0.3483981788158417, "learning_rate": 1.1266461046777537e-06, "loss": 0.3221, "step": 5185 }, { "epoch": 2.411719113315765, "grad_norm": 0.3361232280731201, "learning_rate": 1.1249355329037498e-06, "loss": 0.3369, "step": 5186 }, { "epoch": 2.412184157494962, "grad_norm": 0.30131569504737854, "learning_rate": 1.1232260960731855e-06, "loss": 0.2907, "step": 5187 }, { "epoch": 2.412649201674159, "grad_norm": 0.3845626711845398, "learning_rate": 1.1215177946867262e-06, "loss": 0.3715, "step": 5188 }, { "epoch": 2.413114245853356, "grad_norm": 0.3543691337108612, "learning_rate": 1.1198106292447076e-06, "loss": 0.3494, "step": 5189 }, { "epoch": 2.413579290032553, "grad_norm": 0.4014170467853546, "learning_rate": 1.1181046002471292e-06, "loss": 0.3547, "step": 5190 }, { "epoch": 2.4140443342117504, "grad_norm": 0.3230542540550232, "learning_rate": 1.1163997081936578e-06, "loss": 0.2861, "step": 5191 }, { "epoch": 2.4145093783909473, "grad_norm": 0.31293249130249023, "learning_rate": 1.1146959535836317e-06, "loss": 0.2862, "step": 5192 }, { "epoch": 2.414974422570144, "grad_norm": 0.3535180389881134, "learning_rate": 1.112993336916049e-06, "loss": 0.3462, "step": 5193 }, { "epoch": 2.415439466749341, "grad_norm": 0.32987791299819946, "learning_rate": 1.1112918586895826e-06, "loss": 0.3061, "step": 5194 }, { "epoch": 2.415904510928538, "grad_norm": 0.3237574100494385, "learning_rate": 1.1095915194025642e-06, "loss": 0.2999, "step": 5195 }, { "epoch": 2.4163695551077353, "grad_norm": 0.35278692841529846, "learning_rate": 1.1078923195529973e-06, "loss": 0.3329, "step": 5196 }, { "epoch": 2.4168345992869322, "grad_norm": 0.34075209498405457, "learning_rate": 1.1061942596385516e-06, "loss": 0.3127, "step": 5197 }, { "epoch": 2.417299643466129, "grad_norm": 0.3517972230911255, "learning_rate": 1.1044973401565578e-06, "loss": 0.3275, "step": 5198 }, { "epoch": 2.4177646876453265, "grad_norm": 0.4040515720844269, "learning_rate": 1.1028015616040182e-06, "loss": 0.3465, "step": 5199 }, { "epoch": 2.4182297318245234, "grad_norm": 0.3578769266605377, "learning_rate": 1.1011069244775996e-06, "loss": 0.3302, "step": 5200 }, { "epoch": 2.4186947760037203, "grad_norm": 0.3215552866458893, "learning_rate": 1.0994134292736307e-06, "loss": 0.2755, "step": 5201 }, { "epoch": 2.419159820182917, "grad_norm": 0.3483463227748871, "learning_rate": 1.0977210764881124e-06, "loss": 0.3649, "step": 5202 }, { "epoch": 2.4196248643621145, "grad_norm": 0.32858699560165405, "learning_rate": 1.096029866616704e-06, "loss": 0.3099, "step": 5203 }, { "epoch": 2.4200899085413115, "grad_norm": 0.34919896721839905, "learning_rate": 1.0943398001547362e-06, "loss": 0.3519, "step": 5204 }, { "epoch": 2.4205549527205084, "grad_norm": 0.3417411148548126, "learning_rate": 1.0926508775971995e-06, "loss": 0.301, "step": 5205 }, { "epoch": 2.4210199968997053, "grad_norm": 0.3448435068130493, "learning_rate": 1.0909630994387538e-06, "loss": 0.319, "step": 5206 }, { "epoch": 2.4214850410789026, "grad_norm": 0.31199708580970764, "learning_rate": 1.0892764661737204e-06, "loss": 0.2935, "step": 5207 }, { "epoch": 2.4219500852580995, "grad_norm": 0.33110520243644714, "learning_rate": 1.0875909782960887e-06, "loss": 0.3122, "step": 5208 }, { "epoch": 2.4224151294372964, "grad_norm": 0.3433871865272522, "learning_rate": 1.0859066362995085e-06, "loss": 0.2962, "step": 5209 }, { "epoch": 2.4228801736164938, "grad_norm": 0.34019917249679565, "learning_rate": 1.0842234406772973e-06, "loss": 0.3437, "step": 5210 }, { "epoch": 2.4233452177956907, "grad_norm": 0.3699561655521393, "learning_rate": 1.0825413919224353e-06, "loss": 0.3118, "step": 5211 }, { "epoch": 2.4238102619748876, "grad_norm": 0.364387184381485, "learning_rate": 1.0808604905275693e-06, "loss": 0.3258, "step": 5212 }, { "epoch": 2.4242753061540845, "grad_norm": 0.35317301750183105, "learning_rate": 1.0791807369850048e-06, "loss": 0.3585, "step": 5213 }, { "epoch": 2.424740350333282, "grad_norm": 0.341281533241272, "learning_rate": 1.077502131786718e-06, "loss": 0.3319, "step": 5214 }, { "epoch": 2.4252053945124787, "grad_norm": 0.3004712164402008, "learning_rate": 1.0758246754243412e-06, "loss": 0.2823, "step": 5215 }, { "epoch": 2.4256704386916756, "grad_norm": 0.34995582699775696, "learning_rate": 1.0741483683891773e-06, "loss": 0.2991, "step": 5216 }, { "epoch": 2.4261354828708726, "grad_norm": 0.3658967912197113, "learning_rate": 1.072473211172187e-06, "loss": 0.3243, "step": 5217 }, { "epoch": 2.42660052705007, "grad_norm": 0.3104728162288666, "learning_rate": 1.0707992042639986e-06, "loss": 0.3104, "step": 5218 }, { "epoch": 2.427065571229267, "grad_norm": 0.3573016822338104, "learning_rate": 1.0691263481548996e-06, "loss": 0.3329, "step": 5219 }, { "epoch": 2.4275306154084637, "grad_norm": 0.31468331813812256, "learning_rate": 1.0674546433348453e-06, "loss": 0.2965, "step": 5220 }, { "epoch": 2.427995659587661, "grad_norm": 0.3958417773246765, "learning_rate": 1.0657840902934469e-06, "loss": 0.3601, "step": 5221 }, { "epoch": 2.428460703766858, "grad_norm": 0.3233220875263214, "learning_rate": 1.064114689519985e-06, "loss": 0.3139, "step": 5222 }, { "epoch": 2.428925747946055, "grad_norm": 0.3252237141132355, "learning_rate": 1.0624464415033987e-06, "loss": 0.3017, "step": 5223 }, { "epoch": 2.4293907921252518, "grad_norm": 0.32925140857696533, "learning_rate": 1.060779346732293e-06, "loss": 0.3318, "step": 5224 }, { "epoch": 2.4298558363044487, "grad_norm": 0.3464730978012085, "learning_rate": 1.0591134056949314e-06, "loss": 0.3449, "step": 5225 }, { "epoch": 2.430320880483646, "grad_norm": 0.3610069155693054, "learning_rate": 1.0574486188792393e-06, "loss": 0.3089, "step": 5226 }, { "epoch": 2.430785924662843, "grad_norm": 0.33958742022514343, "learning_rate": 1.0557849867728088e-06, "loss": 0.3386, "step": 5227 }, { "epoch": 2.43125096884204, "grad_norm": 0.3290470540523529, "learning_rate": 1.0541225098628877e-06, "loss": 0.3015, "step": 5228 }, { "epoch": 2.431716013021237, "grad_norm": 0.32628098130226135, "learning_rate": 1.0524611886363912e-06, "loss": 0.3187, "step": 5229 }, { "epoch": 2.432181057200434, "grad_norm": 0.33828026056289673, "learning_rate": 1.0508010235798904e-06, "loss": 0.3105, "step": 5230 }, { "epoch": 2.432646101379631, "grad_norm": 0.33995717763900757, "learning_rate": 1.0491420151796227e-06, "loss": 0.3409, "step": 5231 }, { "epoch": 2.433111145558828, "grad_norm": 0.30739811062812805, "learning_rate": 1.047484163921486e-06, "loss": 0.3109, "step": 5232 }, { "epoch": 2.4335761897380253, "grad_norm": 0.32465070486068726, "learning_rate": 1.0458274702910347e-06, "loss": 0.299, "step": 5233 }, { "epoch": 2.434041233917222, "grad_norm": 0.36253416538238525, "learning_rate": 1.044171934773489e-06, "loss": 0.3325, "step": 5234 }, { "epoch": 2.434506278096419, "grad_norm": 0.31193816661834717, "learning_rate": 1.04251755785373e-06, "loss": 0.3138, "step": 5235 }, { "epoch": 2.434971322275616, "grad_norm": 0.3374314308166504, "learning_rate": 1.0408643400162949e-06, "loss": 0.3161, "step": 5236 }, { "epoch": 2.4354363664548133, "grad_norm": 0.34814929962158203, "learning_rate": 1.039212281745387e-06, "loss": 0.3192, "step": 5237 }, { "epoch": 2.4359014106340102, "grad_norm": 0.3480595648288727, "learning_rate": 1.0375613835248648e-06, "loss": 0.3186, "step": 5238 }, { "epoch": 2.436366454813207, "grad_norm": 0.35934314131736755, "learning_rate": 1.0359116458382523e-06, "loss": 0.3206, "step": 5239 }, { "epoch": 2.4368314989924045, "grad_norm": 0.34923362731933594, "learning_rate": 1.0342630691687283e-06, "loss": 0.3336, "step": 5240 }, { "epoch": 2.4372965431716014, "grad_norm": 0.3344292640686035, "learning_rate": 1.0326156539991361e-06, "loss": 0.329, "step": 5241 }, { "epoch": 2.4377615873507983, "grad_norm": 0.34684503078460693, "learning_rate": 1.0309694008119748e-06, "loss": 0.3105, "step": 5242 }, { "epoch": 2.438226631529995, "grad_norm": 0.32323524355888367, "learning_rate": 1.0293243100894068e-06, "loss": 0.3101, "step": 5243 }, { "epoch": 2.4386916757091925, "grad_norm": 0.3490697741508484, "learning_rate": 1.027680382313253e-06, "loss": 0.3156, "step": 5244 }, { "epoch": 2.4391567198883894, "grad_norm": 0.33271536231040955, "learning_rate": 1.0260376179649905e-06, "loss": 0.3069, "step": 5245 }, { "epoch": 2.4396217640675864, "grad_norm": 0.31948867440223694, "learning_rate": 1.0243960175257605e-06, "loss": 0.3263, "step": 5246 }, { "epoch": 2.4400868082467833, "grad_norm": 0.3256017863750458, "learning_rate": 1.0227555814763623e-06, "loss": 0.3457, "step": 5247 }, { "epoch": 2.4405518524259806, "grad_norm": 0.31808292865753174, "learning_rate": 1.0211163102972494e-06, "loss": 0.3109, "step": 5248 }, { "epoch": 2.4410168966051775, "grad_norm": 0.33860480785369873, "learning_rate": 1.0194782044685414e-06, "loss": 0.3262, "step": 5249 }, { "epoch": 2.4414819407843744, "grad_norm": 0.3586934506893158, "learning_rate": 1.0178412644700093e-06, "loss": 0.3337, "step": 5250 }, { "epoch": 2.4419469849635718, "grad_norm": 0.31557610630989075, "learning_rate": 1.0162054907810892e-06, "loss": 0.2812, "step": 5251 }, { "epoch": 2.4424120291427687, "grad_norm": 0.3515093922615051, "learning_rate": 1.0145708838808704e-06, "loss": 0.3759, "step": 5252 }, { "epoch": 2.4428770733219656, "grad_norm": 0.3308696448802948, "learning_rate": 1.012937444248105e-06, "loss": 0.2839, "step": 5253 }, { "epoch": 2.4433421175011625, "grad_norm": 0.35928985476493835, "learning_rate": 1.0113051723611989e-06, "loss": 0.3185, "step": 5254 }, { "epoch": 2.4438071616803594, "grad_norm": 0.3398415744304657, "learning_rate": 1.0096740686982192e-06, "loss": 0.295, "step": 5255 }, { "epoch": 2.4442722058595567, "grad_norm": 0.3303294777870178, "learning_rate": 1.0080441337368884e-06, "loss": 0.3409, "step": 5256 }, { "epoch": 2.4447372500387536, "grad_norm": 0.33345457911491394, "learning_rate": 1.0064153679545891e-06, "loss": 0.3565, "step": 5257 }, { "epoch": 2.4452022942179505, "grad_norm": 0.30849790573120117, "learning_rate": 1.00478777182836e-06, "loss": 0.3075, "step": 5258 }, { "epoch": 2.445667338397148, "grad_norm": 0.3234153091907501, "learning_rate": 1.0031613458348988e-06, "loss": 0.2985, "step": 5259 }, { "epoch": 2.446132382576345, "grad_norm": 0.3142699897289276, "learning_rate": 1.0015360904505573e-06, "loss": 0.2921, "step": 5260 }, { "epoch": 2.4465974267555417, "grad_norm": 0.36145663261413574, "learning_rate": 9.99912006151348e-07, "loss": 0.3293, "step": 5261 }, { "epoch": 2.4470624709347386, "grad_norm": 0.354502409696579, "learning_rate": 9.98289093412938e-07, "loss": 0.3246, "step": 5262 }, { "epoch": 2.447527515113936, "grad_norm": 0.36151012778282166, "learning_rate": 9.966673527106514e-07, "loss": 0.3153, "step": 5263 }, { "epoch": 2.447992559293133, "grad_norm": 0.3437816798686981, "learning_rate": 9.950467845194712e-07, "loss": 0.3397, "step": 5264 }, { "epoch": 2.4484576034723298, "grad_norm": 0.335560142993927, "learning_rate": 9.934273893140335e-07, "loss": 0.3223, "step": 5265 }, { "epoch": 2.4489226476515267, "grad_norm": 0.33611413836479187, "learning_rate": 9.918091675686343e-07, "loss": 0.3288, "step": 5266 }, { "epoch": 2.449387691830724, "grad_norm": 0.3385131359100342, "learning_rate": 9.90192119757225e-07, "loss": 0.3153, "step": 5267 }, { "epoch": 2.449852736009921, "grad_norm": 0.35654857754707336, "learning_rate": 9.88576246353411e-07, "loss": 0.2852, "step": 5268 }, { "epoch": 2.450317780189118, "grad_norm": 0.3671344220638275, "learning_rate": 9.869615478304567e-07, "loss": 0.3212, "step": 5269 }, { "epoch": 2.450782824368315, "grad_norm": 0.33951297402381897, "learning_rate": 9.853480246612812e-07, "loss": 0.3201, "step": 5270 }, { "epoch": 2.451247868547512, "grad_norm": 0.34729960560798645, "learning_rate": 9.837356773184576e-07, "loss": 0.3244, "step": 5271 }, { "epoch": 2.451712912726709, "grad_norm": 0.3578476011753082, "learning_rate": 9.821245062742191e-07, "loss": 0.2941, "step": 5272 }, { "epoch": 2.452177956905906, "grad_norm": 0.3414701521396637, "learning_rate": 9.805145120004478e-07, "loss": 0.3043, "step": 5273 }, { "epoch": 2.4526430010851032, "grad_norm": 0.30916067957878113, "learning_rate": 9.789056949686882e-07, "loss": 0.299, "step": 5274 }, { "epoch": 2.4531080452643, "grad_norm": 0.3154885470867157, "learning_rate": 9.772980556501338e-07, "loss": 0.313, "step": 5275 }, { "epoch": 2.453573089443497, "grad_norm": 0.3455050587654114, "learning_rate": 9.756915945156392e-07, "loss": 0.3221, "step": 5276 }, { "epoch": 2.454038133622694, "grad_norm": 0.3348415195941925, "learning_rate": 9.74086312035708e-07, "loss": 0.312, "step": 5277 }, { "epoch": 2.4545031778018913, "grad_norm": 0.34347227215766907, "learning_rate": 9.724822086805019e-07, "loss": 0.3398, "step": 5278 }, { "epoch": 2.454968221981088, "grad_norm": 0.340069055557251, "learning_rate": 9.70879284919839e-07, "loss": 0.3009, "step": 5279 }, { "epoch": 2.455433266160285, "grad_norm": 0.3648471236228943, "learning_rate": 9.692775412231863e-07, "loss": 0.3182, "step": 5280 }, { "epoch": 2.4558983103394825, "grad_norm": 0.33610066771507263, "learning_rate": 9.6767697805967e-07, "loss": 0.303, "step": 5281 }, { "epoch": 2.4563633545186794, "grad_norm": 0.3398694694042206, "learning_rate": 9.660775958980712e-07, "loss": 0.3386, "step": 5282 }, { "epoch": 2.4568283986978763, "grad_norm": 0.32278192043304443, "learning_rate": 9.644793952068187e-07, "loss": 0.2896, "step": 5283 }, { "epoch": 2.457293442877073, "grad_norm": 0.3328745365142822, "learning_rate": 9.628823764540035e-07, "loss": 0.303, "step": 5284 }, { "epoch": 2.4577584870562705, "grad_norm": 0.3310834765434265, "learning_rate": 9.612865401073634e-07, "loss": 0.3166, "step": 5285 }, { "epoch": 2.4582235312354674, "grad_norm": 0.43221089243888855, "learning_rate": 9.596918866342959e-07, "loss": 0.3503, "step": 5286 }, { "epoch": 2.4586885754146643, "grad_norm": 0.36144617199897766, "learning_rate": 9.580984165018458e-07, "loss": 0.3356, "step": 5287 }, { "epoch": 2.4591536195938613, "grad_norm": 0.3269920349121094, "learning_rate": 9.565061301767176e-07, "loss": 0.3237, "step": 5288 }, { "epoch": 2.4596186637730586, "grad_norm": 0.32409968972206116, "learning_rate": 9.549150281252633e-07, "loss": 0.3154, "step": 5289 }, { "epoch": 2.4600837079522555, "grad_norm": 0.3190405070781708, "learning_rate": 9.533251108134922e-07, "loss": 0.3, "step": 5290 }, { "epoch": 2.4605487521314524, "grad_norm": 0.3245762884616852, "learning_rate": 9.517363787070672e-07, "loss": 0.3034, "step": 5291 }, { "epoch": 2.4610137963106498, "grad_norm": 0.32275402545928955, "learning_rate": 9.501488322712987e-07, "loss": 0.3172, "step": 5292 }, { "epoch": 2.4614788404898467, "grad_norm": 0.33684971928596497, "learning_rate": 9.485624719711551e-07, "loss": 0.3347, "step": 5293 }, { "epoch": 2.4619438846690436, "grad_norm": 0.35810065269470215, "learning_rate": 9.469772982712561e-07, "loss": 0.3018, "step": 5294 }, { "epoch": 2.4624089288482405, "grad_norm": 0.34780067205429077, "learning_rate": 9.453933116358715e-07, "loss": 0.3157, "step": 5295 }, { "epoch": 2.4628739730274374, "grad_norm": 0.36027947068214417, "learning_rate": 9.438105125289276e-07, "loss": 0.3516, "step": 5296 }, { "epoch": 2.4633390172066347, "grad_norm": 0.317049503326416, "learning_rate": 9.422289014139996e-07, "loss": 0.2994, "step": 5297 }, { "epoch": 2.4638040613858316, "grad_norm": 0.3178118169307709, "learning_rate": 9.406484787543136e-07, "loss": 0.3013, "step": 5298 }, { "epoch": 2.4642691055650285, "grad_norm": 0.3206396698951721, "learning_rate": 9.390692450127531e-07, "loss": 0.3182, "step": 5299 }, { "epoch": 2.464734149744226, "grad_norm": 0.35063356161117554, "learning_rate": 9.374912006518467e-07, "loss": 0.3607, "step": 5300 }, { "epoch": 2.465199193923423, "grad_norm": 0.35182520747184753, "learning_rate": 9.359143461337799e-07, "loss": 0.316, "step": 5301 }, { "epoch": 2.4656642381026197, "grad_norm": 0.3166978359222412, "learning_rate": 9.343386819203892e-07, "loss": 0.2737, "step": 5302 }, { "epoch": 2.4661292822818166, "grad_norm": 0.3476794362068176, "learning_rate": 9.327642084731575e-07, "loss": 0.3288, "step": 5303 }, { "epoch": 2.466594326461014, "grad_norm": 0.33571144938468933, "learning_rate": 9.311909262532248e-07, "loss": 0.319, "step": 5304 }, { "epoch": 2.467059370640211, "grad_norm": 0.3019503355026245, "learning_rate": 9.296188357213804e-07, "loss": 0.2941, "step": 5305 }, { "epoch": 2.4675244148194078, "grad_norm": 0.37272417545318604, "learning_rate": 9.280479373380624e-07, "loss": 0.3377, "step": 5306 }, { "epoch": 2.4679894589986047, "grad_norm": 0.30881267786026, "learning_rate": 9.26478231563363e-07, "loss": 0.3139, "step": 5307 }, { "epoch": 2.468454503177802, "grad_norm": 0.32374900579452515, "learning_rate": 9.249097188570216e-07, "loss": 0.3063, "step": 5308 }, { "epoch": 2.468919547356999, "grad_norm": 0.3490180969238281, "learning_rate": 9.23342399678433e-07, "loss": 0.3599, "step": 5309 }, { "epoch": 2.469384591536196, "grad_norm": 0.34069767594337463, "learning_rate": 9.21776274486636e-07, "loss": 0.3225, "step": 5310 }, { "epoch": 2.469849635715393, "grad_norm": 0.32864606380462646, "learning_rate": 9.202113437403259e-07, "loss": 0.3247, "step": 5311 }, { "epoch": 2.47031467989459, "grad_norm": 0.3595259487628937, "learning_rate": 9.18647607897844e-07, "loss": 0.3265, "step": 5312 }, { "epoch": 2.470779724073787, "grad_norm": 0.3525889217853546, "learning_rate": 9.170850674171833e-07, "loss": 0.3167, "step": 5313 }, { "epoch": 2.471244768252984, "grad_norm": 0.3449327051639557, "learning_rate": 9.155237227559883e-07, "loss": 0.3431, "step": 5314 }, { "epoch": 2.4717098124321812, "grad_norm": 0.3054923117160797, "learning_rate": 9.139635743715486e-07, "loss": 0.2732, "step": 5315 }, { "epoch": 2.472174856611378, "grad_norm": 0.3427397310733795, "learning_rate": 9.124046227208083e-07, "loss": 0.3412, "step": 5316 }, { "epoch": 2.472639900790575, "grad_norm": 0.34003254771232605, "learning_rate": 9.108468682603594e-07, "loss": 0.3276, "step": 5317 }, { "epoch": 2.473104944969772, "grad_norm": 0.31388726830482483, "learning_rate": 9.092903114464407e-07, "loss": 0.3003, "step": 5318 }, { "epoch": 2.4735699891489693, "grad_norm": 0.31117507815361023, "learning_rate": 9.077349527349455e-07, "loss": 0.3139, "step": 5319 }, { "epoch": 2.474035033328166, "grad_norm": 0.32882410287857056, "learning_rate": 9.061807925814098e-07, "loss": 0.3111, "step": 5320 }, { "epoch": 2.474500077507363, "grad_norm": 0.33620962500572205, "learning_rate": 9.046278314410245e-07, "loss": 0.3183, "step": 5321 }, { "epoch": 2.4749651216865605, "grad_norm": 0.3354683518409729, "learning_rate": 9.030760697686247e-07, "loss": 0.3094, "step": 5322 }, { "epoch": 2.4754301658657574, "grad_norm": 0.3122057616710663, "learning_rate": 9.01525508018698e-07, "loss": 0.3064, "step": 5323 }, { "epoch": 2.4758952100449543, "grad_norm": 0.35190722346305847, "learning_rate": 8.999761466453771e-07, "loss": 0.351, "step": 5324 }, { "epoch": 2.476360254224151, "grad_norm": 0.3170509338378906, "learning_rate": 8.984279861024453e-07, "loss": 0.3036, "step": 5325 }, { "epoch": 2.476825298403348, "grad_norm": 0.33579573035240173, "learning_rate": 8.968810268433347e-07, "loss": 0.3135, "step": 5326 }, { "epoch": 2.4772903425825454, "grad_norm": 0.35341697931289673, "learning_rate": 8.953352693211232e-07, "loss": 0.312, "step": 5327 }, { "epoch": 2.4777553867617423, "grad_norm": 0.32664796710014343, "learning_rate": 8.937907139885376e-07, "loss": 0.2932, "step": 5328 }, { "epoch": 2.4782204309409392, "grad_norm": 0.3640941083431244, "learning_rate": 8.922473612979565e-07, "loss": 0.3478, "step": 5329 }, { "epoch": 2.4786854751201366, "grad_norm": 0.30336251854896545, "learning_rate": 8.907052117013981e-07, "loss": 0.2738, "step": 5330 }, { "epoch": 2.4791505192993335, "grad_norm": 0.36704903841018677, "learning_rate": 8.891642656505373e-07, "loss": 0.3298, "step": 5331 }, { "epoch": 2.4796155634785304, "grad_norm": 0.3368130624294281, "learning_rate": 8.876245235966884e-07, "loss": 0.295, "step": 5332 }, { "epoch": 2.4800806076577273, "grad_norm": 0.3467327058315277, "learning_rate": 8.860859859908199e-07, "loss": 0.3234, "step": 5333 }, { "epoch": 2.4805456518369247, "grad_norm": 0.3419296145439148, "learning_rate": 8.845486532835435e-07, "loss": 0.3423, "step": 5334 }, { "epoch": 2.4810106960161216, "grad_norm": 0.3196740448474884, "learning_rate": 8.830125259251171e-07, "loss": 0.3017, "step": 5335 }, { "epoch": 2.4814757401953185, "grad_norm": 0.3675239086151123, "learning_rate": 8.814776043654494e-07, "loss": 0.3654, "step": 5336 }, { "epoch": 2.4819407843745154, "grad_norm": 0.3341238796710968, "learning_rate": 8.799438890540929e-07, "loss": 0.2776, "step": 5337 }, { "epoch": 2.4824058285537127, "grad_norm": 0.3249601125717163, "learning_rate": 8.784113804402506e-07, "loss": 0.3288, "step": 5338 }, { "epoch": 2.4828708727329096, "grad_norm": 0.3380821645259857, "learning_rate": 8.768800789727655e-07, "loss": 0.3253, "step": 5339 }, { "epoch": 2.4833359169121065, "grad_norm": 0.3296220302581787, "learning_rate": 8.753499851001341e-07, "loss": 0.2792, "step": 5340 }, { "epoch": 2.483800961091304, "grad_norm": 0.3442380428314209, "learning_rate": 8.738210992704937e-07, "loss": 0.336, "step": 5341 }, { "epoch": 2.484266005270501, "grad_norm": 0.3112342059612274, "learning_rate": 8.72293421931632e-07, "loss": 0.3206, "step": 5342 }, { "epoch": 2.4847310494496977, "grad_norm": 0.3362886905670166, "learning_rate": 8.707669535309793e-07, "loss": 0.3584, "step": 5343 }, { "epoch": 2.4851960936288946, "grad_norm": 0.3538823127746582, "learning_rate": 8.692416945156151e-07, "loss": 0.3174, "step": 5344 }, { "epoch": 2.485661137808092, "grad_norm": 0.33575835824012756, "learning_rate": 8.677176453322611e-07, "loss": 0.295, "step": 5345 }, { "epoch": 2.486126181987289, "grad_norm": 0.3484673500061035, "learning_rate": 8.66194806427288e-07, "loss": 0.3196, "step": 5346 }, { "epoch": 2.4865912261664858, "grad_norm": 0.36958253383636475, "learning_rate": 8.646731782467094e-07, "loss": 0.3701, "step": 5347 }, { "epoch": 2.4870562703456827, "grad_norm": 0.31938305497169495, "learning_rate": 8.631527612361861e-07, "loss": 0.2928, "step": 5348 }, { "epoch": 2.48752131452488, "grad_norm": 0.33608078956604004, "learning_rate": 8.616335558410244e-07, "loss": 0.3191, "step": 5349 }, { "epoch": 2.487986358704077, "grad_norm": 0.31729990243911743, "learning_rate": 8.601155625061736e-07, "loss": 0.2873, "step": 5350 }, { "epoch": 2.488451402883274, "grad_norm": 0.35453861951828003, "learning_rate": 8.585987816762292e-07, "loss": 0.3096, "step": 5351 }, { "epoch": 2.488916447062471, "grad_norm": 0.3638750910758972, "learning_rate": 8.570832137954333e-07, "loss": 0.3396, "step": 5352 }, { "epoch": 2.489381491241668, "grad_norm": 0.35378140211105347, "learning_rate": 8.555688593076689e-07, "loss": 0.3485, "step": 5353 }, { "epoch": 2.489846535420865, "grad_norm": 0.34362515807151794, "learning_rate": 8.540557186564685e-07, "loss": 0.2998, "step": 5354 }, { "epoch": 2.490311579600062, "grad_norm": 0.33936387300491333, "learning_rate": 8.525437922850033e-07, "loss": 0.3127, "step": 5355 }, { "epoch": 2.490776623779259, "grad_norm": 0.3667736053466797, "learning_rate": 8.51033080636095e-07, "loss": 0.3394, "step": 5356 }, { "epoch": 2.491241667958456, "grad_norm": 0.3014102578163147, "learning_rate": 8.495235841522038e-07, "loss": 0.2753, "step": 5357 }, { "epoch": 2.491706712137653, "grad_norm": 0.31866636872291565, "learning_rate": 8.480153032754396e-07, "loss": 0.3203, "step": 5358 }, { "epoch": 2.49217175631685, "grad_norm": 0.34170639514923096, "learning_rate": 8.465082384475499e-07, "loss": 0.3131, "step": 5359 }, { "epoch": 2.4926368004960473, "grad_norm": 0.31605064868927, "learning_rate": 8.450023901099314e-07, "loss": 0.3298, "step": 5360 }, { "epoch": 2.493101844675244, "grad_norm": 0.336970716714859, "learning_rate": 8.434977587036242e-07, "loss": 0.3574, "step": 5361 }, { "epoch": 2.493566888854441, "grad_norm": 0.3367874324321747, "learning_rate": 8.419943446693069e-07, "loss": 0.3065, "step": 5362 }, { "epoch": 2.494031933033638, "grad_norm": 0.34468874335289, "learning_rate": 8.404921484473072e-07, "loss": 0.3462, "step": 5363 }, { "epoch": 2.4944969772128354, "grad_norm": 0.30258965492248535, "learning_rate": 8.38991170477595e-07, "loss": 0.2844, "step": 5364 }, { "epoch": 2.4949620213920323, "grad_norm": 0.32411083579063416, "learning_rate": 8.3749141119978e-07, "loss": 0.3055, "step": 5365 }, { "epoch": 2.495427065571229, "grad_norm": 0.33687278628349304, "learning_rate": 8.359928710531195e-07, "loss": 0.3058, "step": 5366 }, { "epoch": 2.495892109750426, "grad_norm": 0.33974799513816833, "learning_rate": 8.344955504765089e-07, "loss": 0.3166, "step": 5367 }, { "epoch": 2.4963571539296234, "grad_norm": 0.3623729646205902, "learning_rate": 8.32999449908492e-07, "loss": 0.3154, "step": 5368 }, { "epoch": 2.4968221981088203, "grad_norm": 0.36364513635635376, "learning_rate": 8.315045697872514e-07, "loss": 0.3118, "step": 5369 }, { "epoch": 2.4972872422880172, "grad_norm": 0.32118678092956543, "learning_rate": 8.30010910550611e-07, "loss": 0.3173, "step": 5370 }, { "epoch": 2.4977522864672146, "grad_norm": 0.329588383436203, "learning_rate": 8.285184726360412e-07, "loss": 0.3409, "step": 5371 }, { "epoch": 2.4982173306464115, "grad_norm": 0.29638898372650146, "learning_rate": 8.27027256480653e-07, "loss": 0.2994, "step": 5372 }, { "epoch": 2.4986823748256084, "grad_norm": 0.3159889578819275, "learning_rate": 8.255372625212005e-07, "loss": 0.3076, "step": 5373 }, { "epoch": 2.4991474190048053, "grad_norm": 0.3349873423576355, "learning_rate": 8.240484911940755e-07, "loss": 0.3355, "step": 5374 }, { "epoch": 2.4996124631840027, "grad_norm": 0.34741565585136414, "learning_rate": 8.225609429353187e-07, "loss": 0.3413, "step": 5375 }, { "epoch": 2.5000775073631996, "grad_norm": 0.3568498492240906, "learning_rate": 8.210746181806051e-07, "loss": 0.3315, "step": 5376 }, { "epoch": 2.5005425515423965, "grad_norm": 0.3185107409954071, "learning_rate": 8.195895173652585e-07, "loss": 0.2859, "step": 5377 }, { "epoch": 2.5010075957215934, "grad_norm": 0.32899004220962524, "learning_rate": 8.181056409242377e-07, "loss": 0.3093, "step": 5378 }, { "epoch": 2.5014726399007907, "grad_norm": 0.30876079201698303, "learning_rate": 8.16622989292149e-07, "loss": 0.3068, "step": 5379 }, { "epoch": 2.5019376840799876, "grad_norm": 0.31439492106437683, "learning_rate": 8.151415629032338e-07, "loss": 0.3405, "step": 5380 }, { "epoch": 2.5024027282591845, "grad_norm": 0.3182373344898224, "learning_rate": 8.136613621913813e-07, "loss": 0.3143, "step": 5381 }, { "epoch": 2.502867772438382, "grad_norm": 0.3052506744861603, "learning_rate": 8.121823875901152e-07, "loss": 0.32, "step": 5382 }, { "epoch": 2.503332816617579, "grad_norm": 0.33077898621559143, "learning_rate": 8.107046395326041e-07, "loss": 0.3325, "step": 5383 }, { "epoch": 2.5037978607967757, "grad_norm": 0.31627658009529114, "learning_rate": 8.092281184516571e-07, "loss": 0.2717, "step": 5384 }, { "epoch": 2.5042629049759726, "grad_norm": 0.34410303831100464, "learning_rate": 8.077528247797234e-07, "loss": 0.3265, "step": 5385 }, { "epoch": 2.5047279491551695, "grad_norm": 0.3441540002822876, "learning_rate": 8.062787589488913e-07, "loss": 0.3192, "step": 5386 }, { "epoch": 2.505192993334367, "grad_norm": 0.3462598919868469, "learning_rate": 8.048059213908927e-07, "loss": 0.3397, "step": 5387 }, { "epoch": 2.5056580375135638, "grad_norm": 0.302598774433136, "learning_rate": 8.033343125370952e-07, "loss": 0.2927, "step": 5388 }, { "epoch": 2.5061230816927607, "grad_norm": 0.33706727623939514, "learning_rate": 8.018639328185113e-07, "loss": 0.3264, "step": 5389 }, { "epoch": 2.506588125871958, "grad_norm": 0.3481765687465668, "learning_rate": 8.003947826657898e-07, "loss": 0.3415, "step": 5390 }, { "epoch": 2.507053170051155, "grad_norm": 0.31380075216293335, "learning_rate": 7.989268625092223e-07, "loss": 0.2639, "step": 5391 }, { "epoch": 2.507518214230352, "grad_norm": 0.34795790910720825, "learning_rate": 7.974601727787374e-07, "loss": 0.3427, "step": 5392 }, { "epoch": 2.507983258409549, "grad_norm": 0.32209163904190063, "learning_rate": 7.959947139039065e-07, "loss": 0.3024, "step": 5393 }, { "epoch": 2.508448302588746, "grad_norm": 0.3540125787258148, "learning_rate": 7.945304863139358e-07, "loss": 0.3565, "step": 5394 }, { "epoch": 2.508913346767943, "grad_norm": 0.37601977586746216, "learning_rate": 7.930674904376762e-07, "loss": 0.3115, "step": 5395 }, { "epoch": 2.50937839094714, "grad_norm": 0.33575406670570374, "learning_rate": 7.916057267036159e-07, "loss": 0.3297, "step": 5396 }, { "epoch": 2.509843435126337, "grad_norm": 0.32133379578590393, "learning_rate": 7.901451955398792e-07, "loss": 0.3136, "step": 5397 }, { "epoch": 2.510308479305534, "grad_norm": 0.3117270767688751, "learning_rate": 7.886858973742334e-07, "loss": 0.2933, "step": 5398 }, { "epoch": 2.510773523484731, "grad_norm": 0.3320932984352112, "learning_rate": 7.872278326340849e-07, "loss": 0.3404, "step": 5399 }, { "epoch": 2.511238567663928, "grad_norm": 0.3289964497089386, "learning_rate": 7.857710017464737e-07, "loss": 0.3172, "step": 5400 }, { "epoch": 2.5117036118431253, "grad_norm": 0.32881319522857666, "learning_rate": 7.843154051380852e-07, "loss": 0.3287, "step": 5401 }, { "epoch": 2.512168656022322, "grad_norm": 0.3330029547214508, "learning_rate": 7.828610432352373e-07, "loss": 0.2865, "step": 5402 }, { "epoch": 2.512633700201519, "grad_norm": 0.3702026307582855, "learning_rate": 7.814079164638915e-07, "loss": 0.3376, "step": 5403 }, { "epoch": 2.513098744380716, "grad_norm": 0.3574296832084656, "learning_rate": 7.799560252496424e-07, "loss": 0.3269, "step": 5404 }, { "epoch": 2.513563788559913, "grad_norm": 0.3529759645462036, "learning_rate": 7.785053700177275e-07, "loss": 0.3205, "step": 5405 }, { "epoch": 2.5140288327391103, "grad_norm": 0.33158957958221436, "learning_rate": 7.770559511930187e-07, "loss": 0.3341, "step": 5406 }, { "epoch": 2.514493876918307, "grad_norm": 0.3311402499675751, "learning_rate": 7.756077692000274e-07, "loss": 0.3263, "step": 5407 }, { "epoch": 2.514958921097504, "grad_norm": 0.377869188785553, "learning_rate": 7.741608244629045e-07, "loss": 0.3237, "step": 5408 }, { "epoch": 2.5154239652767014, "grad_norm": 0.3278749883174896, "learning_rate": 7.727151174054342e-07, "loss": 0.3334, "step": 5409 }, { "epoch": 2.5158890094558983, "grad_norm": 0.34262946248054504, "learning_rate": 7.712706484510424e-07, "loss": 0.332, "step": 5410 }, { "epoch": 2.5163540536350952, "grad_norm": 0.32814016938209534, "learning_rate": 7.698274180227888e-07, "loss": 0.2826, "step": 5411 }, { "epoch": 2.5168190978142926, "grad_norm": 0.3465859293937683, "learning_rate": 7.683854265433737e-07, "loss": 0.3271, "step": 5412 }, { "epoch": 2.5172841419934895, "grad_norm": 0.33419957756996155, "learning_rate": 7.669446744351317e-07, "loss": 0.2999, "step": 5413 }, { "epoch": 2.5177491861726864, "grad_norm": 0.3603866994380951, "learning_rate": 7.655051621200377e-07, "loss": 0.3173, "step": 5414 }, { "epoch": 2.5182142303518833, "grad_norm": 0.33212265372276306, "learning_rate": 7.640668900196985e-07, "loss": 0.292, "step": 5415 }, { "epoch": 2.51867927453108, "grad_norm": 0.35766664147377014, "learning_rate": 7.626298585553637e-07, "loss": 0.3385, "step": 5416 }, { "epoch": 2.5191443187102776, "grad_norm": 0.35297131538391113, "learning_rate": 7.611940681479141e-07, "loss": 0.3162, "step": 5417 }, { "epoch": 2.5196093628894745, "grad_norm": 0.3046947419643402, "learning_rate": 7.597595192178702e-07, "loss": 0.2764, "step": 5418 }, { "epoch": 2.5200744070686714, "grad_norm": 0.38086947798728943, "learning_rate": 7.583262121853879e-07, "loss": 0.3521, "step": 5419 }, { "epoch": 2.5205394512478687, "grad_norm": 0.357149600982666, "learning_rate": 7.568941474702618e-07, "loss": 0.3225, "step": 5420 }, { "epoch": 2.5210044954270656, "grad_norm": 0.32697343826293945, "learning_rate": 7.554633254919169e-07, "loss": 0.3138, "step": 5421 }, { "epoch": 2.5214695396062625, "grad_norm": 0.3249689042568207, "learning_rate": 7.540337466694203e-07, "loss": 0.3149, "step": 5422 }, { "epoch": 2.52193458378546, "grad_norm": 0.3241865634918213, "learning_rate": 7.526054114214704e-07, "loss": 0.3102, "step": 5423 }, { "epoch": 2.5223996279646568, "grad_norm": 0.33546963334083557, "learning_rate": 7.511783201664053e-07, "loss": 0.34, "step": 5424 }, { "epoch": 2.5228646721438537, "grad_norm": 0.3344916105270386, "learning_rate": 7.49752473322195e-07, "loss": 0.3078, "step": 5425 }, { "epoch": 2.5233297163230506, "grad_norm": 0.3365899622440338, "learning_rate": 7.48327871306449e-07, "loss": 0.3172, "step": 5426 }, { "epoch": 2.5237947605022475, "grad_norm": 0.3597327768802643, "learning_rate": 7.469045145364079e-07, "loss": 0.3203, "step": 5427 }, { "epoch": 2.524259804681445, "grad_norm": 0.3610391914844513, "learning_rate": 7.454824034289515e-07, "loss": 0.2926, "step": 5428 }, { "epoch": 2.5247248488606417, "grad_norm": 0.33750978112220764, "learning_rate": 7.440615384005917e-07, "loss": 0.288, "step": 5429 }, { "epoch": 2.5251898930398387, "grad_norm": 0.35543230175971985, "learning_rate": 7.426419198674773e-07, "loss": 0.3081, "step": 5430 }, { "epoch": 2.525654937219036, "grad_norm": 0.3200836777687073, "learning_rate": 7.412235482453911e-07, "loss": 0.3226, "step": 5431 }, { "epoch": 2.526119981398233, "grad_norm": 0.46929338574409485, "learning_rate": 7.398064239497538e-07, "loss": 0.3042, "step": 5432 }, { "epoch": 2.52658502557743, "grad_norm": 0.31712985038757324, "learning_rate": 7.383905473956137e-07, "loss": 0.313, "step": 5433 }, { "epoch": 2.5270500697566267, "grad_norm": 0.31012576818466187, "learning_rate": 7.369759189976622e-07, "loss": 0.3, "step": 5434 }, { "epoch": 2.527515113935824, "grad_norm": 0.3287335932254791, "learning_rate": 7.355625391702176e-07, "loss": 0.3219, "step": 5435 }, { "epoch": 2.527980158115021, "grad_norm": 0.3296997547149658, "learning_rate": 7.341504083272388e-07, "loss": 0.3344, "step": 5436 }, { "epoch": 2.528445202294218, "grad_norm": 0.3109728693962097, "learning_rate": 7.327395268823128e-07, "loss": 0.323, "step": 5437 }, { "epoch": 2.528910246473415, "grad_norm": 0.2954341471195221, "learning_rate": 7.313298952486675e-07, "loss": 0.2939, "step": 5438 }, { "epoch": 2.529375290652612, "grad_norm": 0.3587803542613983, "learning_rate": 7.299215138391574e-07, "loss": 0.3491, "step": 5439 }, { "epoch": 2.529840334831809, "grad_norm": 0.29528895020484924, "learning_rate": 7.285143830662778e-07, "loss": 0.2828, "step": 5440 }, { "epoch": 2.530305379011006, "grad_norm": 0.33470654487609863, "learning_rate": 7.271085033421516e-07, "loss": 0.3367, "step": 5441 }, { "epoch": 2.5307704231902033, "grad_norm": 0.3154504895210266, "learning_rate": 7.2570387507854e-07, "loss": 0.2794, "step": 5442 }, { "epoch": 2.5312354673694, "grad_norm": 0.34760943055152893, "learning_rate": 7.243004986868357e-07, "loss": 0.3367, "step": 5443 }, { "epoch": 2.531700511548597, "grad_norm": 0.33287641406059265, "learning_rate": 7.228983745780643e-07, "loss": 0.3303, "step": 5444 }, { "epoch": 2.532165555727794, "grad_norm": 0.3180674612522125, "learning_rate": 7.214975031628856e-07, "loss": 0.2925, "step": 5445 }, { "epoch": 2.532630599906991, "grad_norm": 0.3350416421890259, "learning_rate": 7.200978848515911e-07, "loss": 0.34, "step": 5446 }, { "epoch": 2.5330956440861883, "grad_norm": 0.347086101770401, "learning_rate": 7.186995200541086e-07, "loss": 0.3296, "step": 5447 }, { "epoch": 2.533560688265385, "grad_norm": 0.3760274350643158, "learning_rate": 7.17302409179993e-07, "loss": 0.349, "step": 5448 }, { "epoch": 2.534025732444582, "grad_norm": 0.3314683437347412, "learning_rate": 7.159065526384384e-07, "loss": 0.2927, "step": 5449 }, { "epoch": 2.5344907766237794, "grad_norm": 0.309643417596817, "learning_rate": 7.145119508382664e-07, "loss": 0.2969, "step": 5450 }, { "epoch": 2.5349558208029763, "grad_norm": 0.4211340546607971, "learning_rate": 7.131186041879357e-07, "loss": 0.3603, "step": 5451 }, { "epoch": 2.5354208649821732, "grad_norm": 0.3383927643299103, "learning_rate": 7.117265130955314e-07, "loss": 0.3306, "step": 5452 }, { "epoch": 2.5358859091613706, "grad_norm": 0.30522429943084717, "learning_rate": 7.10335677968777e-07, "loss": 0.2957, "step": 5453 }, { "epoch": 2.5363509533405675, "grad_norm": 0.31220120191574097, "learning_rate": 7.089460992150243e-07, "loss": 0.3003, "step": 5454 }, { "epoch": 2.5368159975197644, "grad_norm": 0.36357051134109497, "learning_rate": 7.075577772412607e-07, "loss": 0.3585, "step": 5455 }, { "epoch": 2.5372810416989613, "grad_norm": 0.3370119333267212, "learning_rate": 7.061707124540995e-07, "loss": 0.3196, "step": 5456 }, { "epoch": 2.537746085878158, "grad_norm": 0.33187320828437805, "learning_rate": 7.047849052597927e-07, "loss": 0.3124, "step": 5457 }, { "epoch": 2.5382111300573555, "grad_norm": 0.3284560739994049, "learning_rate": 7.034003560642183e-07, "loss": 0.321, "step": 5458 }, { "epoch": 2.5386761742365525, "grad_norm": 0.31819069385528564, "learning_rate": 7.020170652728903e-07, "loss": 0.3049, "step": 5459 }, { "epoch": 2.5391412184157494, "grad_norm": 0.2943916618824005, "learning_rate": 7.006350332909495e-07, "loss": 0.2641, "step": 5460 }, { "epoch": 2.5396062625949467, "grad_norm": 0.3456489145755768, "learning_rate": 6.992542605231739e-07, "loss": 0.367, "step": 5461 }, { "epoch": 2.5400713067741436, "grad_norm": 0.30786919593811035, "learning_rate": 6.978747473739666e-07, "loss": 0.3087, "step": 5462 }, { "epoch": 2.5405363509533405, "grad_norm": 0.3164925277233124, "learning_rate": 6.964964942473662e-07, "loss": 0.3066, "step": 5463 }, { "epoch": 2.541001395132538, "grad_norm": 0.32444530725479126, "learning_rate": 6.951195015470396e-07, "loss": 0.3156, "step": 5464 }, { "epoch": 2.5414664393117348, "grad_norm": 0.3363860845565796, "learning_rate": 6.937437696762861e-07, "loss": 0.3014, "step": 5465 }, { "epoch": 2.5419314834909317, "grad_norm": 0.34026390314102173, "learning_rate": 6.923692990380349e-07, "loss": 0.3131, "step": 5466 }, { "epoch": 2.5423965276701286, "grad_norm": 0.31564757227897644, "learning_rate": 6.909960900348483e-07, "loss": 0.3066, "step": 5467 }, { "epoch": 2.5428615718493255, "grad_norm": 0.3490791618824005, "learning_rate": 6.896241430689133e-07, "loss": 0.3221, "step": 5468 }, { "epoch": 2.543326616028523, "grad_norm": 0.346028208732605, "learning_rate": 6.882534585420542e-07, "loss": 0.3227, "step": 5469 }, { "epoch": 2.5437916602077197, "grad_norm": 0.3382766842842102, "learning_rate": 6.868840368557194e-07, "loss": 0.3471, "step": 5470 }, { "epoch": 2.5442567043869166, "grad_norm": 0.30995672941207886, "learning_rate": 6.855158784109927e-07, "loss": 0.3273, "step": 5471 }, { "epoch": 2.544721748566114, "grad_norm": 0.3215526342391968, "learning_rate": 6.841489836085835e-07, "loss": 0.3241, "step": 5472 }, { "epoch": 2.545186792745311, "grad_norm": 0.3126165270805359, "learning_rate": 6.827833528488348e-07, "loss": 0.2946, "step": 5473 }, { "epoch": 2.545651836924508, "grad_norm": 0.3339928388595581, "learning_rate": 6.814189865317156e-07, "loss": 0.3383, "step": 5474 }, { "epoch": 2.5461168811037047, "grad_norm": 0.30406853556632996, "learning_rate": 6.800558850568295e-07, "loss": 0.3031, "step": 5475 }, { "epoch": 2.5465819252829016, "grad_norm": 0.3174978494644165, "learning_rate": 6.786940488234034e-07, "loss": 0.3134, "step": 5476 }, { "epoch": 2.547046969462099, "grad_norm": 0.32636481523513794, "learning_rate": 6.773334782302993e-07, "loss": 0.3233, "step": 5477 }, { "epoch": 2.547512013641296, "grad_norm": 0.31358960270881653, "learning_rate": 6.759741736760062e-07, "loss": 0.2805, "step": 5478 }, { "epoch": 2.5479770578204928, "grad_norm": 0.36361631751060486, "learning_rate": 6.746161355586411e-07, "loss": 0.3479, "step": 5479 }, { "epoch": 2.54844210199969, "grad_norm": 0.3181024193763733, "learning_rate": 6.732593642759533e-07, "loss": 0.2933, "step": 5480 }, { "epoch": 2.548907146178887, "grad_norm": 0.3153908848762512, "learning_rate": 6.719038602253164e-07, "loss": 0.3188, "step": 5481 }, { "epoch": 2.549372190358084, "grad_norm": 0.3370981216430664, "learning_rate": 6.705496238037379e-07, "loss": 0.3397, "step": 5482 }, { "epoch": 2.5498372345372813, "grad_norm": 0.32690051198005676, "learning_rate": 6.691966554078494e-07, "loss": 0.2883, "step": 5483 }, { "epoch": 2.550302278716478, "grad_norm": 0.35293567180633545, "learning_rate": 6.678449554339161e-07, "loss": 0.3367, "step": 5484 }, { "epoch": 2.550767322895675, "grad_norm": 0.2994639575481415, "learning_rate": 6.664945242778264e-07, "loss": 0.3071, "step": 5485 }, { "epoch": 2.551232367074872, "grad_norm": 0.30928120017051697, "learning_rate": 6.651453623351017e-07, "loss": 0.3188, "step": 5486 }, { "epoch": 2.551697411254069, "grad_norm": 0.3208847939968109, "learning_rate": 6.637974700008876e-07, "loss": 0.3407, "step": 5487 }, { "epoch": 2.5521624554332663, "grad_norm": 0.3318404257297516, "learning_rate": 6.624508476699609e-07, "loss": 0.3058, "step": 5488 }, { "epoch": 2.552627499612463, "grad_norm": 0.34476324915885925, "learning_rate": 6.611054957367253e-07, "loss": 0.2937, "step": 5489 }, { "epoch": 2.55309254379166, "grad_norm": 0.3424232304096222, "learning_rate": 6.597614145952136e-07, "loss": 0.3149, "step": 5490 }, { "epoch": 2.5535575879708574, "grad_norm": 0.336582750082016, "learning_rate": 6.584186046390839e-07, "loss": 0.3275, "step": 5491 }, { "epoch": 2.5540226321500543, "grad_norm": 0.30492153763771057, "learning_rate": 6.570770662616244e-07, "loss": 0.3226, "step": 5492 }, { "epoch": 2.5544876763292512, "grad_norm": 0.344941645860672, "learning_rate": 6.557367998557485e-07, "loss": 0.3153, "step": 5493 }, { "epoch": 2.5549527205084486, "grad_norm": 0.3531348407268524, "learning_rate": 6.543978058140005e-07, "loss": 0.3301, "step": 5494 }, { "epoch": 2.5554177646876455, "grad_norm": 0.3221784234046936, "learning_rate": 6.530600845285478e-07, "loss": 0.3318, "step": 5495 }, { "epoch": 2.5558828088668424, "grad_norm": 0.30838826298713684, "learning_rate": 6.517236363911894e-07, "loss": 0.2999, "step": 5496 }, { "epoch": 2.5563478530460393, "grad_norm": 0.3686372935771942, "learning_rate": 6.503884617933471e-07, "loss": 0.3305, "step": 5497 }, { "epoch": 2.556812897225236, "grad_norm": 0.31591683626174927, "learning_rate": 6.490545611260741e-07, "loss": 0.3122, "step": 5498 }, { "epoch": 2.5572779414044335, "grad_norm": 0.34594687819480896, "learning_rate": 6.477219347800462e-07, "loss": 0.3461, "step": 5499 }, { "epoch": 2.5577429855836304, "grad_norm": 0.31076663732528687, "learning_rate": 6.463905831455685e-07, "loss": 0.2906, "step": 5500 }, { "epoch": 2.5582080297628274, "grad_norm": 0.3259563446044922, "learning_rate": 6.450605066125726e-07, "loss": 0.3008, "step": 5501 }, { "epoch": 2.5586730739420247, "grad_norm": 0.31875672936439514, "learning_rate": 6.437317055706172e-07, "loss": 0.3028, "step": 5502 }, { "epoch": 2.5591381181212216, "grad_norm": 0.3503369390964508, "learning_rate": 6.424041804088848e-07, "loss": 0.3006, "step": 5503 }, { "epoch": 2.5596031623004185, "grad_norm": 0.3309253454208374, "learning_rate": 6.410779315161885e-07, "loss": 0.3309, "step": 5504 }, { "epoch": 2.5600682064796154, "grad_norm": 0.30641940236091614, "learning_rate": 6.397529592809615e-07, "loss": 0.3159, "step": 5505 }, { "epoch": 2.5605332506588123, "grad_norm": 0.4112693965435028, "learning_rate": 6.384292640912704e-07, "loss": 0.328, "step": 5506 }, { "epoch": 2.5609982948380097, "grad_norm": 0.3142835199832916, "learning_rate": 6.371068463348006e-07, "loss": 0.3059, "step": 5507 }, { "epoch": 2.5614633390172066, "grad_norm": 0.32879573106765747, "learning_rate": 6.357857063988692e-07, "loss": 0.3145, "step": 5508 }, { "epoch": 2.5619283831964035, "grad_norm": 0.3391149640083313, "learning_rate": 6.344658446704155e-07, "loss": 0.3231, "step": 5509 }, { "epoch": 2.562393427375601, "grad_norm": 0.30871260166168213, "learning_rate": 6.331472615360062e-07, "loss": 0.3248, "step": 5510 }, { "epoch": 2.5628584715547977, "grad_norm": 0.32863956689834595, "learning_rate": 6.318299573818315e-07, "loss": 0.2954, "step": 5511 }, { "epoch": 2.5633235157339946, "grad_norm": 0.3561699688434601, "learning_rate": 6.305139325937098e-07, "loss": 0.3346, "step": 5512 }, { "epoch": 2.563788559913192, "grad_norm": 0.32806986570358276, "learning_rate": 6.291991875570841e-07, "loss": 0.2992, "step": 5513 }, { "epoch": 2.564253604092389, "grad_norm": 0.4038988947868347, "learning_rate": 6.278857226570196e-07, "loss": 0.3525, "step": 5514 }, { "epoch": 2.564718648271586, "grad_norm": 0.35461750626564026, "learning_rate": 6.265735382782106e-07, "loss": 0.3138, "step": 5515 }, { "epoch": 2.5651836924507827, "grad_norm": 0.3094637393951416, "learning_rate": 6.252626348049734e-07, "loss": 0.305, "step": 5516 }, { "epoch": 2.5656487366299796, "grad_norm": 0.3154189884662628, "learning_rate": 6.239530126212518e-07, "loss": 0.3293, "step": 5517 }, { "epoch": 2.566113780809177, "grad_norm": 0.3151610493659973, "learning_rate": 6.226446721106111e-07, "loss": 0.3055, "step": 5518 }, { "epoch": 2.566578824988374, "grad_norm": 0.3299187421798706, "learning_rate": 6.213376136562449e-07, "loss": 0.3182, "step": 5519 }, { "epoch": 2.5670438691675708, "grad_norm": 0.34355783462524414, "learning_rate": 6.20031837640967e-07, "loss": 0.3599, "step": 5520 }, { "epoch": 2.567508913346768, "grad_norm": 0.3010472059249878, "learning_rate": 6.187273444472202e-07, "loss": 0.2715, "step": 5521 }, { "epoch": 2.567973957525965, "grad_norm": 0.3234943151473999, "learning_rate": 6.174241344570681e-07, "loss": 0.3174, "step": 5522 }, { "epoch": 2.568439001705162, "grad_norm": 0.3738628327846527, "learning_rate": 6.161222080522e-07, "loss": 0.3164, "step": 5523 }, { "epoch": 2.5689040458843593, "grad_norm": 0.31000205874443054, "learning_rate": 6.14821565613929e-07, "loss": 0.3146, "step": 5524 }, { "epoch": 2.569369090063556, "grad_norm": 0.34590592980384827, "learning_rate": 6.135222075231933e-07, "loss": 0.3303, "step": 5525 }, { "epoch": 2.569834134242753, "grad_norm": 0.3145350217819214, "learning_rate": 6.122241341605523e-07, "loss": 0.3044, "step": 5526 }, { "epoch": 2.57029917842195, "grad_norm": 0.3343406021595001, "learning_rate": 6.109273459061916e-07, "loss": 0.3601, "step": 5527 }, { "epoch": 2.570764222601147, "grad_norm": 0.30140528082847595, "learning_rate": 6.096318431399178e-07, "loss": 0.3012, "step": 5528 }, { "epoch": 2.5712292667803442, "grad_norm": 0.34568503499031067, "learning_rate": 6.083376262411644e-07, "loss": 0.3658, "step": 5529 }, { "epoch": 2.571694310959541, "grad_norm": 0.33937206864356995, "learning_rate": 6.070446955889853e-07, "loss": 0.3469, "step": 5530 }, { "epoch": 2.572159355138738, "grad_norm": 0.3581177592277527, "learning_rate": 6.057530515620608e-07, "loss": 0.3144, "step": 5531 }, { "epoch": 2.5726243993179354, "grad_norm": 0.29124531149864197, "learning_rate": 6.044626945386894e-07, "loss": 0.3194, "step": 5532 }, { "epoch": 2.5730894434971323, "grad_norm": 0.30630677938461304, "learning_rate": 6.031736248967984e-07, "loss": 0.3128, "step": 5533 }, { "epoch": 2.573554487676329, "grad_norm": 0.3839922547340393, "learning_rate": 6.018858430139335e-07, "loss": 0.3398, "step": 5534 }, { "epoch": 2.574019531855526, "grad_norm": 0.336647093296051, "learning_rate": 6.005993492672657e-07, "loss": 0.2957, "step": 5535 }, { "epoch": 2.574484576034723, "grad_norm": 0.3169485330581665, "learning_rate": 5.993141440335887e-07, "loss": 0.3208, "step": 5536 }, { "epoch": 2.5749496202139204, "grad_norm": 0.3146721124649048, "learning_rate": 5.980302276893191e-07, "loss": 0.3066, "step": 5537 }, { "epoch": 2.5754146643931173, "grad_norm": 0.32376930117607117, "learning_rate": 5.967476006104922e-07, "loss": 0.3335, "step": 5538 }, { "epoch": 2.575879708572314, "grad_norm": 0.33218416571617126, "learning_rate": 5.95466263172772e-07, "loss": 0.3186, "step": 5539 }, { "epoch": 2.5763447527515115, "grad_norm": 0.31140658259391785, "learning_rate": 5.941862157514383e-07, "loss": 0.3257, "step": 5540 }, { "epoch": 2.5768097969307084, "grad_norm": 0.324327677488327, "learning_rate": 5.92907458721399e-07, "loss": 0.3425, "step": 5541 }, { "epoch": 2.5772748411099053, "grad_norm": 0.3449290692806244, "learning_rate": 5.916299924571789e-07, "loss": 0.3705, "step": 5542 }, { "epoch": 2.5777398852891027, "grad_norm": 0.32734236121177673, "learning_rate": 5.903538173329287e-07, "loss": 0.2884, "step": 5543 }, { "epoch": 2.5782049294682996, "grad_norm": 0.33377382159233093, "learning_rate": 5.890789337224184e-07, "loss": 0.3193, "step": 5544 }, { "epoch": 2.5786699736474965, "grad_norm": 0.31567618250846863, "learning_rate": 5.87805341999042e-07, "loss": 0.3011, "step": 5545 }, { "epoch": 2.5791350178266934, "grad_norm": 0.3374476134777069, "learning_rate": 5.865330425358118e-07, "loss": 0.3549, "step": 5546 }, { "epoch": 2.5796000620058903, "grad_norm": 0.3086042106151581, "learning_rate": 5.852620357053651e-07, "loss": 0.3016, "step": 5547 }, { "epoch": 2.5800651061850877, "grad_norm": 0.31613045930862427, "learning_rate": 5.839923218799587e-07, "loss": 0.3169, "step": 5548 }, { "epoch": 2.5805301503642846, "grad_norm": 0.2952466309070587, "learning_rate": 5.827239014314723e-07, "loss": 0.3013, "step": 5549 }, { "epoch": 2.5809951945434815, "grad_norm": 0.32223108410835266, "learning_rate": 5.814567747314049e-07, "loss": 0.3347, "step": 5550 }, { "epoch": 2.581460238722679, "grad_norm": 0.3356245458126068, "learning_rate": 5.801909421508756e-07, "loss": 0.3239, "step": 5551 }, { "epoch": 2.5819252829018757, "grad_norm": 0.35723865032196045, "learning_rate": 5.789264040606291e-07, "loss": 0.3591, "step": 5552 }, { "epoch": 2.5823903270810726, "grad_norm": 0.3004617989063263, "learning_rate": 5.776631608310257e-07, "loss": 0.3046, "step": 5553 }, { "epoch": 2.58285537126027, "grad_norm": 0.35467642545700073, "learning_rate": 5.764012128320507e-07, "loss": 0.3802, "step": 5554 }, { "epoch": 2.583320415439467, "grad_norm": 0.3245300054550171, "learning_rate": 5.75140560433306e-07, "loss": 0.2898, "step": 5555 }, { "epoch": 2.583785459618664, "grad_norm": 0.342220276594162, "learning_rate": 5.738812040040187e-07, "loss": 0.3478, "step": 5556 }, { "epoch": 2.5842505037978607, "grad_norm": 0.3202744126319885, "learning_rate": 5.726231439130314e-07, "loss": 0.3162, "step": 5557 }, { "epoch": 2.5847155479770576, "grad_norm": 0.33512887358665466, "learning_rate": 5.713663805288106e-07, "loss": 0.308, "step": 5558 }, { "epoch": 2.585180592156255, "grad_norm": 0.33824869990348816, "learning_rate": 5.701109142194422e-07, "loss": 0.308, "step": 5559 }, { "epoch": 2.585645636335452, "grad_norm": 0.3539738953113556, "learning_rate": 5.688567453526328e-07, "loss": 0.3317, "step": 5560 }, { "epoch": 2.5861106805146488, "grad_norm": 0.32088860869407654, "learning_rate": 5.676038742957057e-07, "loss": 0.3272, "step": 5561 }, { "epoch": 2.586575724693846, "grad_norm": 0.33904799818992615, "learning_rate": 5.663523014156086e-07, "loss": 0.2933, "step": 5562 }, { "epoch": 2.587040768873043, "grad_norm": 0.3523750901222229, "learning_rate": 5.651020270789049e-07, "loss": 0.2996, "step": 5563 }, { "epoch": 2.58750581305224, "grad_norm": 0.32999008893966675, "learning_rate": 5.638530516517821e-07, "loss": 0.3068, "step": 5564 }, { "epoch": 2.587970857231437, "grad_norm": 0.3218596279621124, "learning_rate": 5.626053755000421e-07, "loss": 0.32, "step": 5565 }, { "epoch": 2.588435901410634, "grad_norm": 0.32146209478378296, "learning_rate": 5.613589989891116e-07, "loss": 0.3225, "step": 5566 }, { "epoch": 2.588900945589831, "grad_norm": 0.2964208424091339, "learning_rate": 5.601139224840318e-07, "loss": 0.2975, "step": 5567 }, { "epoch": 2.589365989769028, "grad_norm": 0.332450270652771, "learning_rate": 5.588701463494672e-07, "loss": 0.3431, "step": 5568 }, { "epoch": 2.589831033948225, "grad_norm": 0.28528085350990295, "learning_rate": 5.576276709496975e-07, "loss": 0.2784, "step": 5569 }, { "epoch": 2.5902960781274222, "grad_norm": 0.3144235908985138, "learning_rate": 5.563864966486254e-07, "loss": 0.3402, "step": 5570 }, { "epoch": 2.590761122306619, "grad_norm": 0.35045549273490906, "learning_rate": 5.551466238097697e-07, "loss": 0.3278, "step": 5571 }, { "epoch": 2.591226166485816, "grad_norm": 0.3082601726055145, "learning_rate": 5.539080527962704e-07, "loss": 0.3108, "step": 5572 }, { "epoch": 2.5916912106650134, "grad_norm": 0.30737003684043884, "learning_rate": 5.526707839708834e-07, "loss": 0.3004, "step": 5573 }, { "epoch": 2.5921562548442103, "grad_norm": 0.34509941935539246, "learning_rate": 5.514348176959855e-07, "loss": 0.3588, "step": 5574 }, { "epoch": 2.592621299023407, "grad_norm": 0.33185267448425293, "learning_rate": 5.5020015433357e-07, "loss": 0.3188, "step": 5575 }, { "epoch": 2.593086343202604, "grad_norm": 0.32632753252983093, "learning_rate": 5.489667942452515e-07, "loss": 0.2991, "step": 5576 }, { "epoch": 2.593551387381801, "grad_norm": 0.31657204031944275, "learning_rate": 5.477347377922593e-07, "loss": 0.3415, "step": 5577 }, { "epoch": 2.5940164315609984, "grad_norm": 0.3294719457626343, "learning_rate": 5.465039853354442e-07, "loss": 0.3144, "step": 5578 }, { "epoch": 2.5944814757401953, "grad_norm": 0.3257904052734375, "learning_rate": 5.452745372352725e-07, "loss": 0.2761, "step": 5579 }, { "epoch": 2.594946519919392, "grad_norm": 0.3162056505680084, "learning_rate": 5.440463938518304e-07, "loss": 0.333, "step": 5580 }, { "epoch": 2.5954115640985895, "grad_norm": 0.3013499081134796, "learning_rate": 5.428195555448202e-07, "loss": 0.3004, "step": 5581 }, { "epoch": 2.5958766082777864, "grad_norm": 0.3342210352420807, "learning_rate": 5.415940226735633e-07, "loss": 0.3388, "step": 5582 }, { "epoch": 2.5963416524569833, "grad_norm": 0.31008031964302063, "learning_rate": 5.403697955969988e-07, "loss": 0.2927, "step": 5583 }, { "epoch": 2.5968066966361807, "grad_norm": 0.32369595766067505, "learning_rate": 5.391468746736834e-07, "loss": 0.335, "step": 5584 }, { "epoch": 2.5972717408153776, "grad_norm": 0.3301704525947571, "learning_rate": 5.379252602617902e-07, "loss": 0.3322, "step": 5585 }, { "epoch": 2.5977367849945745, "grad_norm": 0.2886703908443451, "learning_rate": 5.367049527191093e-07, "loss": 0.2747, "step": 5586 }, { "epoch": 2.5982018291737714, "grad_norm": 0.3267196714878082, "learning_rate": 5.354859524030503e-07, "loss": 0.3076, "step": 5587 }, { "epoch": 2.5986668733529683, "grad_norm": 0.34606629610061646, "learning_rate": 5.342682596706372e-07, "loss": 0.3378, "step": 5588 }, { "epoch": 2.5991319175321657, "grad_norm": 0.3182050287723541, "learning_rate": 5.330518748785147e-07, "loss": 0.3144, "step": 5589 }, { "epoch": 2.5995969617113626, "grad_norm": 0.3103780746459961, "learning_rate": 5.318367983829393e-07, "loss": 0.2717, "step": 5590 }, { "epoch": 2.6000620058905595, "grad_norm": 0.36062633991241455, "learning_rate": 5.306230305397897e-07, "loss": 0.3159, "step": 5591 }, { "epoch": 2.600527050069757, "grad_norm": 0.3258778750896454, "learning_rate": 5.294105717045567e-07, "loss": 0.3356, "step": 5592 }, { "epoch": 2.6009920942489537, "grad_norm": 0.33117127418518066, "learning_rate": 5.281994222323506e-07, "loss": 0.329, "step": 5593 }, { "epoch": 2.6014571384281506, "grad_norm": 0.32020604610443115, "learning_rate": 5.269895824778976e-07, "loss": 0.3093, "step": 5594 }, { "epoch": 2.6019221826073475, "grad_norm": 0.33276131749153137, "learning_rate": 5.25781052795541e-07, "loss": 0.3143, "step": 5595 }, { "epoch": 2.602387226786545, "grad_norm": 0.3160843551158905, "learning_rate": 5.245738335392376e-07, "loss": 0.2935, "step": 5596 }, { "epoch": 2.602852270965742, "grad_norm": 0.3074491620063782, "learning_rate": 5.233679250625646e-07, "loss": 0.3083, "step": 5597 }, { "epoch": 2.6033173151449387, "grad_norm": 0.3221345841884613, "learning_rate": 5.221633277187104e-07, "loss": 0.3324, "step": 5598 }, { "epoch": 2.6037823593241356, "grad_norm": 0.36884576082229614, "learning_rate": 5.209600418604843e-07, "loss": 0.3293, "step": 5599 }, { "epoch": 2.604247403503333, "grad_norm": 0.3398898243904114, "learning_rate": 5.197580678403074e-07, "loss": 0.3435, "step": 5600 }, { "epoch": 2.60471244768253, "grad_norm": 0.2826615273952484, "learning_rate": 5.185574060102206e-07, "loss": 0.2942, "step": 5601 }, { "epoch": 2.6051774918617268, "grad_norm": 0.35747769474983215, "learning_rate": 5.17358056721875e-07, "loss": 0.3186, "step": 5602 }, { "epoch": 2.605642536040924, "grad_norm": 0.32651564478874207, "learning_rate": 5.161600203265438e-07, "loss": 0.3283, "step": 5603 }, { "epoch": 2.606107580220121, "grad_norm": 0.305650532245636, "learning_rate": 5.149632971751096e-07, "loss": 0.3121, "step": 5604 }, { "epoch": 2.606572624399318, "grad_norm": 0.30761486291885376, "learning_rate": 5.137678876180746e-07, "loss": 0.3106, "step": 5605 }, { "epoch": 2.607037668578515, "grad_norm": 0.3115074038505554, "learning_rate": 5.125737920055551e-07, "loss": 0.3197, "step": 5606 }, { "epoch": 2.6075027127577117, "grad_norm": 0.3347545564174652, "learning_rate": 5.113810106872825e-07, "loss": 0.3098, "step": 5607 }, { "epoch": 2.607967756936909, "grad_norm": 0.3298303782939911, "learning_rate": 5.10189544012602e-07, "loss": 0.316, "step": 5608 }, { "epoch": 2.608432801116106, "grad_norm": 0.33597150444984436, "learning_rate": 5.089993923304759e-07, "loss": 0.3258, "step": 5609 }, { "epoch": 2.608897845295303, "grad_norm": 0.312759667634964, "learning_rate": 5.078105559894791e-07, "loss": 0.3181, "step": 5610 }, { "epoch": 2.6093628894745002, "grad_norm": 0.32988691329956055, "learning_rate": 5.066230353378038e-07, "loss": 0.3211, "step": 5611 }, { "epoch": 2.609827933653697, "grad_norm": 0.333040714263916, "learning_rate": 5.054368307232537e-07, "loss": 0.3087, "step": 5612 }, { "epoch": 2.610292977832894, "grad_norm": 0.3228285014629364, "learning_rate": 5.042519424932512e-07, "loss": 0.3095, "step": 5613 }, { "epoch": 2.6107580220120914, "grad_norm": 0.35871362686157227, "learning_rate": 5.030683709948292e-07, "loss": 0.3171, "step": 5614 }, { "epoch": 2.6112230661912883, "grad_norm": 0.3096243739128113, "learning_rate": 5.018861165746369e-07, "loss": 0.3054, "step": 5615 }, { "epoch": 2.611688110370485, "grad_norm": 0.2971365451812744, "learning_rate": 5.007051795789375e-07, "loss": 0.2977, "step": 5616 }, { "epoch": 2.612153154549682, "grad_norm": 0.31067079305648804, "learning_rate": 4.995255603536076e-07, "loss": 0.3265, "step": 5617 }, { "epoch": 2.612618198728879, "grad_norm": 0.3296055793762207, "learning_rate": 4.983472592441391e-07, "loss": 0.3023, "step": 5618 }, { "epoch": 2.6130832429080764, "grad_norm": 0.33535173535346985, "learning_rate": 4.971702765956388e-07, "loss": 0.3282, "step": 5619 }, { "epoch": 2.6135482870872733, "grad_norm": 0.30592218041419983, "learning_rate": 4.959946127528231e-07, "loss": 0.3186, "step": 5620 }, { "epoch": 2.61401333126647, "grad_norm": 0.32471343874931335, "learning_rate": 4.948202680600267e-07, "loss": 0.2948, "step": 5621 }, { "epoch": 2.6144783754456675, "grad_norm": 0.331249862909317, "learning_rate": 4.936472428611961e-07, "loss": 0.3168, "step": 5622 }, { "epoch": 2.6149434196248644, "grad_norm": 0.33011171221733093, "learning_rate": 4.924755374998891e-07, "loss": 0.3389, "step": 5623 }, { "epoch": 2.6154084638040613, "grad_norm": 0.3165659010410309, "learning_rate": 4.913051523192819e-07, "loss": 0.317, "step": 5624 }, { "epoch": 2.6158735079832587, "grad_norm": 0.3213482201099396, "learning_rate": 4.901360876621597e-07, "loss": 0.3103, "step": 5625 }, { "epoch": 2.6163385521624556, "grad_norm": 0.3293680250644684, "learning_rate": 4.88968343870923e-07, "loss": 0.3189, "step": 5626 }, { "epoch": 2.6168035963416525, "grad_norm": 0.3114880323410034, "learning_rate": 4.87801921287585e-07, "loss": 0.317, "step": 5627 }, { "epoch": 2.6172686405208494, "grad_norm": 0.32358518242836, "learning_rate": 4.866368202537714e-07, "loss": 0.3315, "step": 5628 }, { "epoch": 2.6177336847000463, "grad_norm": 0.3022099733352661, "learning_rate": 4.854730411107217e-07, "loss": 0.3197, "step": 5629 }, { "epoch": 2.6181987288792437, "grad_norm": 0.34834814071655273, "learning_rate": 4.843105841992895e-07, "loss": 0.3536, "step": 5630 }, { "epoch": 2.6186637730584406, "grad_norm": 0.3669452369213104, "learning_rate": 4.831494498599371e-07, "loss": 0.3462, "step": 5631 }, { "epoch": 2.6191288172376375, "grad_norm": 0.3363710343837738, "learning_rate": 4.819896384327433e-07, "loss": 0.3383, "step": 5632 }, { "epoch": 2.619593861416835, "grad_norm": 0.30958956480026245, "learning_rate": 4.808311502573976e-07, "loss": 0.2891, "step": 5633 }, { "epoch": 2.6200589055960317, "grad_norm": 0.3210178315639496, "learning_rate": 4.796739856732024e-07, "loss": 0.3327, "step": 5634 }, { "epoch": 2.6205239497752286, "grad_norm": 0.31170886754989624, "learning_rate": 4.785181450190723e-07, "loss": 0.3178, "step": 5635 }, { "epoch": 2.6209889939544255, "grad_norm": 0.3083958029747009, "learning_rate": 4.773636286335348e-07, "loss": 0.3016, "step": 5636 }, { "epoch": 2.6214540381336224, "grad_norm": 0.35575956106185913, "learning_rate": 4.7621043685472824e-07, "loss": 0.3492, "step": 5637 }, { "epoch": 2.62191908231282, "grad_norm": 0.3041512370109558, "learning_rate": 4.750585700204047e-07, "loss": 0.2905, "step": 5638 }, { "epoch": 2.6223841264920167, "grad_norm": 0.3430851399898529, "learning_rate": 4.739080284679254e-07, "loss": 0.3088, "step": 5639 }, { "epoch": 2.6228491706712136, "grad_norm": 0.3387916088104248, "learning_rate": 4.727588125342669e-07, "loss": 0.319, "step": 5640 }, { "epoch": 2.623314214850411, "grad_norm": 0.3547907769680023, "learning_rate": 4.716109225560156e-07, "loss": 0.2953, "step": 5641 }, { "epoch": 2.623779259029608, "grad_norm": 0.32964009046554565, "learning_rate": 4.7046435886937024e-07, "loss": 0.3286, "step": 5642 }, { "epoch": 2.6242443032088048, "grad_norm": 0.38167962431907654, "learning_rate": 4.6931912181014007e-07, "loss": 0.3104, "step": 5643 }, { "epoch": 2.624709347388002, "grad_norm": 0.33540862798690796, "learning_rate": 4.681752117137467e-07, "loss": 0.3494, "step": 5644 }, { "epoch": 2.625174391567199, "grad_norm": 0.3289260268211365, "learning_rate": 4.6703262891522214e-07, "loss": 0.3212, "step": 5645 }, { "epoch": 2.625639435746396, "grad_norm": 0.3132651150226593, "learning_rate": 4.6589137374921155e-07, "loss": 0.2817, "step": 5646 }, { "epoch": 2.626104479925593, "grad_norm": 0.33676326274871826, "learning_rate": 4.647514465499686e-07, "loss": 0.3141, "step": 5647 }, { "epoch": 2.6265695241047897, "grad_norm": 0.3420335054397583, "learning_rate": 4.6361284765136125e-07, "loss": 0.3312, "step": 5648 }, { "epoch": 2.627034568283987, "grad_norm": 0.33695536851882935, "learning_rate": 4.6247557738686445e-07, "loss": 0.3208, "step": 5649 }, { "epoch": 2.627499612463184, "grad_norm": 0.351370245218277, "learning_rate": 4.613396360895683e-07, "loss": 0.3156, "step": 5650 }, { "epoch": 2.627964656642381, "grad_norm": 0.3574759364128113, "learning_rate": 4.602050240921696e-07, "loss": 0.305, "step": 5651 }, { "epoch": 2.6284297008215782, "grad_norm": 0.3272240161895752, "learning_rate": 4.590717417269791e-07, "loss": 0.338, "step": 5652 }, { "epoch": 2.628894745000775, "grad_norm": 0.3331619203090668, "learning_rate": 4.5793978932591574e-07, "loss": 0.3186, "step": 5653 }, { "epoch": 2.629359789179972, "grad_norm": 0.3595447242259979, "learning_rate": 4.568091672205122e-07, "loss": 0.3238, "step": 5654 }, { "epoch": 2.6298248333591694, "grad_norm": 0.327280730009079, "learning_rate": 4.5567987574190677e-07, "loss": 0.3365, "step": 5655 }, { "epoch": 2.6302898775383663, "grad_norm": 0.28672948479652405, "learning_rate": 4.5455191522085274e-07, "loss": 0.2672, "step": 5656 }, { "epoch": 2.630754921717563, "grad_norm": 0.3301229774951935, "learning_rate": 4.534252859877097e-07, "loss": 0.344, "step": 5657 }, { "epoch": 2.63121996589676, "grad_norm": 0.3244812488555908, "learning_rate": 4.522999883724494e-07, "loss": 0.3144, "step": 5658 }, { "epoch": 2.631685010075957, "grad_norm": 0.3279408812522888, "learning_rate": 4.511760227046541e-07, "loss": 0.3403, "step": 5659 }, { "epoch": 2.6321500542551544, "grad_norm": 0.32007279992103577, "learning_rate": 4.500533893135134e-07, "loss": 0.3198, "step": 5660 }, { "epoch": 2.6326150984343513, "grad_norm": 0.3092459440231323, "learning_rate": 4.489320885278309e-07, "loss": 0.3085, "step": 5661 }, { "epoch": 2.633080142613548, "grad_norm": 0.32061436772346497, "learning_rate": 4.4781212067601445e-07, "loss": 0.3115, "step": 5662 }, { "epoch": 2.6335451867927455, "grad_norm": 0.3219727873802185, "learning_rate": 4.4669348608608664e-07, "loss": 0.3278, "step": 5663 }, { "epoch": 2.6340102309719424, "grad_norm": 0.33633241057395935, "learning_rate": 4.4557618508567603e-07, "loss": 0.3318, "step": 5664 }, { "epoch": 2.6344752751511393, "grad_norm": 0.31990426778793335, "learning_rate": 4.4446021800202356e-07, "loss": 0.3395, "step": 5665 }, { "epoch": 2.6349403193303362, "grad_norm": 0.33744657039642334, "learning_rate": 4.4334558516197666e-07, "loss": 0.3202, "step": 5666 }, { "epoch": 2.635405363509533, "grad_norm": 0.301940381526947, "learning_rate": 4.422322868919937e-07, "loss": 0.2888, "step": 5667 }, { "epoch": 2.6358704076887305, "grad_norm": 0.34101638197898865, "learning_rate": 4.411203235181405e-07, "loss": 0.329, "step": 5668 }, { "epoch": 2.6363354518679274, "grad_norm": 0.3637217581272125, "learning_rate": 4.400096953660948e-07, "loss": 0.3665, "step": 5669 }, { "epoch": 2.6368004960471243, "grad_norm": 0.30867865681648254, "learning_rate": 4.3890040276114044e-07, "loss": 0.2851, "step": 5670 }, { "epoch": 2.6372655402263216, "grad_norm": 0.3138599693775177, "learning_rate": 4.377924460281718e-07, "loss": 0.3383, "step": 5671 }, { "epoch": 2.6377305844055186, "grad_norm": 0.2975723147392273, "learning_rate": 4.3668582549169005e-07, "loss": 0.2762, "step": 5672 }, { "epoch": 2.6381956285847155, "grad_norm": 0.3239637315273285, "learning_rate": 4.355805414758085e-07, "loss": 0.3482, "step": 5673 }, { "epoch": 2.638660672763913, "grad_norm": 0.3464727997779846, "learning_rate": 4.3447659430424507e-07, "loss": 0.3351, "step": 5674 }, { "epoch": 2.6391257169431097, "grad_norm": 0.309800386428833, "learning_rate": 4.3337398430032815e-07, "loss": 0.301, "step": 5675 }, { "epoch": 2.6395907611223066, "grad_norm": 0.30942457914352417, "learning_rate": 4.322727117869951e-07, "loss": 0.3041, "step": 5676 }, { "epoch": 2.6400558053015035, "grad_norm": 0.3338727355003357, "learning_rate": 4.3117277708679126e-07, "loss": 0.3128, "step": 5677 }, { "epoch": 2.6405208494807004, "grad_norm": 0.33797094225883484, "learning_rate": 4.3007418052186834e-07, "loss": 0.3582, "step": 5678 }, { "epoch": 2.6409858936598978, "grad_norm": 0.336549311876297, "learning_rate": 4.289769224139884e-07, "loss": 0.3085, "step": 5679 }, { "epoch": 2.6414509378390947, "grad_norm": 0.3069862425327301, "learning_rate": 4.278810030845193e-07, "loss": 0.3163, "step": 5680 }, { "epoch": 2.6419159820182916, "grad_norm": 0.3145103454589844, "learning_rate": 4.2678642285443937e-07, "loss": 0.3241, "step": 5681 }, { "epoch": 2.642381026197489, "grad_norm": 0.30022549629211426, "learning_rate": 4.2569318204433217e-07, "loss": 0.2823, "step": 5682 }, { "epoch": 2.642846070376686, "grad_norm": 0.37692004442214966, "learning_rate": 4.2460128097439157e-07, "loss": 0.3223, "step": 5683 }, { "epoch": 2.6433111145558827, "grad_norm": 0.33564627170562744, "learning_rate": 4.235107199644162e-07, "loss": 0.318, "step": 5684 }, { "epoch": 2.64377615873508, "grad_norm": 0.3265214264392853, "learning_rate": 4.224214993338149e-07, "loss": 0.3559, "step": 5685 }, { "epoch": 2.644241202914277, "grad_norm": 0.31681397557258606, "learning_rate": 4.2133361940160153e-07, "loss": 0.3053, "step": 5686 }, { "epoch": 2.644706247093474, "grad_norm": 0.317182332277298, "learning_rate": 4.202470804863984e-07, "loss": 0.3074, "step": 5687 }, { "epoch": 2.645171291272671, "grad_norm": 0.3192555606365204, "learning_rate": 4.1916188290643643e-07, "loss": 0.3069, "step": 5688 }, { "epoch": 2.6456363354518677, "grad_norm": 0.3164333403110504, "learning_rate": 4.1807802697955256e-07, "loss": 0.2939, "step": 5689 }, { "epoch": 2.646101379631065, "grad_norm": 0.32663610577583313, "learning_rate": 4.169955130231884e-07, "loss": 0.3759, "step": 5690 }, { "epoch": 2.646566423810262, "grad_norm": 0.3227081000804901, "learning_rate": 4.15914341354397e-07, "loss": 0.3027, "step": 5691 }, { "epoch": 2.647031467989459, "grad_norm": 0.3126518726348877, "learning_rate": 4.1483451228983453e-07, "loss": 0.3067, "step": 5692 }, { "epoch": 2.6474965121686562, "grad_norm": 0.3389665484428406, "learning_rate": 4.137560261457663e-07, "loss": 0.3341, "step": 5693 }, { "epoch": 2.647961556347853, "grad_norm": 0.3213329613208771, "learning_rate": 4.1267888323806294e-07, "loss": 0.3122, "step": 5694 }, { "epoch": 2.64842660052705, "grad_norm": 0.341896116733551, "learning_rate": 4.1160308388220103e-07, "loss": 0.3159, "step": 5695 }, { "epoch": 2.648891644706247, "grad_norm": 0.33184269070625305, "learning_rate": 4.1052862839326745e-07, "loss": 0.3124, "step": 5696 }, { "epoch": 2.649356688885444, "grad_norm": 0.32483184337615967, "learning_rate": 4.0945551708594934e-07, "loss": 0.3103, "step": 5697 }, { "epoch": 2.649821733064641, "grad_norm": 0.33331936597824097, "learning_rate": 4.083837502745458e-07, "loss": 0.3271, "step": 5698 }, { "epoch": 2.650286777243838, "grad_norm": 0.3043390214443207, "learning_rate": 4.0731332827295966e-07, "loss": 0.2737, "step": 5699 }, { "epoch": 2.650751821423035, "grad_norm": 0.3190237581729889, "learning_rate": 4.062442513947007e-07, "loss": 0.3411, "step": 5700 }, { "epoch": 2.6512168656022324, "grad_norm": 0.3191225826740265, "learning_rate": 4.051765199528823e-07, "loss": 0.3043, "step": 5701 }, { "epoch": 2.6516819097814293, "grad_norm": 0.33000683784484863, "learning_rate": 4.0411013426022773e-07, "loss": 0.3551, "step": 5702 }, { "epoch": 2.652146953960626, "grad_norm": 0.32607778906822205, "learning_rate": 4.0304509462906203e-07, "loss": 0.3328, "step": 5703 }, { "epoch": 2.6526119981398235, "grad_norm": 0.3193208575248718, "learning_rate": 4.0198140137132024e-07, "loss": 0.3034, "step": 5704 }, { "epoch": 2.6530770423190204, "grad_norm": 0.3008403778076172, "learning_rate": 4.0091905479853865e-07, "loss": 0.3229, "step": 5705 }, { "epoch": 2.6535420864982173, "grad_norm": 0.3564378023147583, "learning_rate": 3.9985805522186336e-07, "loss": 0.3812, "step": 5706 }, { "epoch": 2.6540071306774142, "grad_norm": 0.37547022104263306, "learning_rate": 3.98798402952042e-07, "loss": 0.332, "step": 5707 }, { "epoch": 2.654472174856611, "grad_norm": 0.2705591917037964, "learning_rate": 3.977400982994306e-07, "loss": 0.2647, "step": 5708 }, { "epoch": 2.6549372190358085, "grad_norm": 0.31391727924346924, "learning_rate": 3.966831415739891e-07, "loss": 0.326, "step": 5709 }, { "epoch": 2.6554022632150054, "grad_norm": 0.3211490213871002, "learning_rate": 3.9562753308528267e-07, "loss": 0.3346, "step": 5710 }, { "epoch": 2.6558673073942023, "grad_norm": 0.3080466687679291, "learning_rate": 3.945732731424823e-07, "loss": 0.3227, "step": 5711 }, { "epoch": 2.6563323515733996, "grad_norm": 0.3370213508605957, "learning_rate": 3.935203620543643e-07, "loss": 0.3306, "step": 5712 }, { "epoch": 2.6567973957525965, "grad_norm": 0.29656779766082764, "learning_rate": 3.924688001293081e-07, "loss": 0.2599, "step": 5713 }, { "epoch": 2.6572624399317935, "grad_norm": 0.3250420093536377, "learning_rate": 3.9141858767530014e-07, "loss": 0.3182, "step": 5714 }, { "epoch": 2.657727484110991, "grad_norm": 0.33750423789024353, "learning_rate": 3.903697249999289e-07, "loss": 0.3342, "step": 5715 }, { "epoch": 2.6581925282901877, "grad_norm": 0.29559361934661865, "learning_rate": 3.8932221241039125e-07, "loss": 0.295, "step": 5716 }, { "epoch": 2.6586575724693846, "grad_norm": 0.35010501742362976, "learning_rate": 3.882760502134847e-07, "loss": 0.328, "step": 5717 }, { "epoch": 2.6591226166485815, "grad_norm": 0.3221285343170166, "learning_rate": 3.872312387156146e-07, "loss": 0.3553, "step": 5718 }, { "epoch": 2.6595876608277784, "grad_norm": 0.3222235441207886, "learning_rate": 3.8618777822278854e-07, "loss": 0.3039, "step": 5719 }, { "epoch": 2.6600527050069758, "grad_norm": 0.32205820083618164, "learning_rate": 3.8514566904061967e-07, "loss": 0.3102, "step": 5720 }, { "epoch": 2.6605177491861727, "grad_norm": 0.33951982855796814, "learning_rate": 3.841049114743239e-07, "loss": 0.3002, "step": 5721 }, { "epoch": 2.6609827933653696, "grad_norm": 0.33085671067237854, "learning_rate": 3.8306550582872306e-07, "loss": 0.3237, "step": 5722 }, { "epoch": 2.661447837544567, "grad_norm": 0.3133414685726166, "learning_rate": 3.820274524082418e-07, "loss": 0.2978, "step": 5723 }, { "epoch": 2.661912881723764, "grad_norm": 0.3263706862926483, "learning_rate": 3.809907515169103e-07, "loss": 0.3129, "step": 5724 }, { "epoch": 2.6623779259029607, "grad_norm": 0.3518964350223541, "learning_rate": 3.7995540345835914e-07, "loss": 0.2998, "step": 5725 }, { "epoch": 2.6628429700821576, "grad_norm": 0.32105782628059387, "learning_rate": 3.7892140853582725e-07, "loss": 0.3411, "step": 5726 }, { "epoch": 2.663308014261355, "grad_norm": 0.30892109870910645, "learning_rate": 3.7788876705215307e-07, "loss": 0.3243, "step": 5727 }, { "epoch": 2.663773058440552, "grad_norm": 0.31716519594192505, "learning_rate": 3.7685747930978236e-07, "loss": 0.341, "step": 5728 }, { "epoch": 2.664238102619749, "grad_norm": 0.31774482131004333, "learning_rate": 3.758275456107613e-07, "loss": 0.2949, "step": 5729 }, { "epoch": 2.6647031467989457, "grad_norm": 0.31866157054901123, "learning_rate": 3.747989662567403e-07, "loss": 0.3215, "step": 5730 }, { "epoch": 2.665168190978143, "grad_norm": 0.33754652738571167, "learning_rate": 3.73771741548975e-07, "loss": 0.3054, "step": 5731 }, { "epoch": 2.66563323515734, "grad_norm": 0.3196556270122528, "learning_rate": 3.727458717883209e-07, "loss": 0.3075, "step": 5732 }, { "epoch": 2.666098279336537, "grad_norm": 0.356734037399292, "learning_rate": 3.717213572752404e-07, "loss": 0.3356, "step": 5733 }, { "epoch": 2.666563323515734, "grad_norm": 0.3005916178226471, "learning_rate": 3.706981983097957e-07, "loss": 0.3205, "step": 5734 }, { "epoch": 2.667028367694931, "grad_norm": 0.35259753465652466, "learning_rate": 3.6967639519165546e-07, "loss": 0.3188, "step": 5735 }, { "epoch": 2.667493411874128, "grad_norm": 0.3179108500480652, "learning_rate": 3.686559482200874e-07, "loss": 0.3535, "step": 5736 }, { "epoch": 2.667958456053325, "grad_norm": 0.3107699751853943, "learning_rate": 3.6763685769396484e-07, "loss": 0.2986, "step": 5737 }, { "epoch": 2.668423500232522, "grad_norm": 0.29539617896080017, "learning_rate": 3.6661912391176223e-07, "loss": 0.3016, "step": 5738 }, { "epoch": 2.668888544411719, "grad_norm": 0.3341010808944702, "learning_rate": 3.6560274717155784e-07, "loss": 0.3494, "step": 5739 }, { "epoch": 2.669353588590916, "grad_norm": 0.3309285342693329, "learning_rate": 3.645877277710308e-07, "loss": 0.295, "step": 5740 }, { "epoch": 2.669818632770113, "grad_norm": 0.3219027817249298, "learning_rate": 3.635740660074655e-07, "loss": 0.3116, "step": 5741 }, { "epoch": 2.6702836769493103, "grad_norm": 0.33891165256500244, "learning_rate": 3.6256176217774496e-07, "loss": 0.3293, "step": 5742 }, { "epoch": 2.6707487211285073, "grad_norm": 0.3448745608329773, "learning_rate": 3.6155081657835876e-07, "loss": 0.3291, "step": 5743 }, { "epoch": 2.671213765307704, "grad_norm": 0.32368507981300354, "learning_rate": 3.6054122950539447e-07, "loss": 0.3126, "step": 5744 }, { "epoch": 2.6716788094869015, "grad_norm": 0.3642202913761139, "learning_rate": 3.595330012545445e-07, "loss": 0.3264, "step": 5745 }, { "epoch": 2.6721438536660984, "grad_norm": 0.3226480782032013, "learning_rate": 3.5852613212110307e-07, "loss": 0.3025, "step": 5746 }, { "epoch": 2.6726088978452953, "grad_norm": 0.3831692039966583, "learning_rate": 3.5752062239996554e-07, "loss": 0.3591, "step": 5747 }, { "epoch": 2.6730739420244922, "grad_norm": 0.3260175585746765, "learning_rate": 3.5651647238562904e-07, "loss": 0.3103, "step": 5748 }, { "epoch": 2.673538986203689, "grad_norm": 0.3270534574985504, "learning_rate": 3.555136823721933e-07, "loss": 0.3214, "step": 5749 }, { "epoch": 2.6740040303828865, "grad_norm": 0.32291585206985474, "learning_rate": 3.545122526533579e-07, "loss": 0.3335, "step": 5750 }, { "epoch": 2.6744690745620834, "grad_norm": 0.2985171675682068, "learning_rate": 3.5351218352242755e-07, "loss": 0.3173, "step": 5751 }, { "epoch": 2.6749341187412803, "grad_norm": 0.32506510615348816, "learning_rate": 3.525134752723042e-07, "loss": 0.3106, "step": 5752 }, { "epoch": 2.6753991629204776, "grad_norm": 0.33014851808547974, "learning_rate": 3.515161281954943e-07, "loss": 0.3127, "step": 5753 }, { "epoch": 2.6758642070996745, "grad_norm": 0.33311840891838074, "learning_rate": 3.5052014258410426e-07, "loss": 0.3143, "step": 5754 }, { "epoch": 2.6763292512788714, "grad_norm": 0.3252905309200287, "learning_rate": 3.4952551872984295e-07, "loss": 0.3435, "step": 5755 }, { "epoch": 2.6767942954580684, "grad_norm": 0.31564247608184814, "learning_rate": 3.485322569240174e-07, "loss": 0.3095, "step": 5756 }, { "epoch": 2.6772593396372657, "grad_norm": 0.3435322642326355, "learning_rate": 3.475403574575398e-07, "loss": 0.3198, "step": 5757 }, { "epoch": 2.6777243838164626, "grad_norm": 0.3655998706817627, "learning_rate": 3.4654982062092113e-07, "loss": 0.3149, "step": 5758 }, { "epoch": 2.6781894279956595, "grad_norm": 0.32407233119010925, "learning_rate": 3.455606467042738e-07, "loss": 0.3186, "step": 5759 }, { "epoch": 2.6786544721748564, "grad_norm": 0.3063153624534607, "learning_rate": 3.445728359973094e-07, "loss": 0.2878, "step": 5760 }, { "epoch": 2.6791195163540538, "grad_norm": 0.35395288467407227, "learning_rate": 3.435863887893431e-07, "loss": 0.3123, "step": 5761 }, { "epoch": 2.6795845605332507, "grad_norm": 0.3261089026927948, "learning_rate": 3.426013053692878e-07, "loss": 0.3338, "step": 5762 }, { "epoch": 2.6800496047124476, "grad_norm": 0.3305519223213196, "learning_rate": 3.4161758602566043e-07, "loss": 0.3206, "step": 5763 }, { "epoch": 2.680514648891645, "grad_norm": 0.32225289940834045, "learning_rate": 3.406352310465749e-07, "loss": 0.3315, "step": 5764 }, { "epoch": 2.680979693070842, "grad_norm": 0.3169860243797302, "learning_rate": 3.3965424071974727e-07, "loss": 0.3072, "step": 5765 }, { "epoch": 2.6814447372500387, "grad_norm": 0.3510488271713257, "learning_rate": 3.386746153324943e-07, "loss": 0.3437, "step": 5766 }, { "epoch": 2.6819097814292356, "grad_norm": 0.3206140995025635, "learning_rate": 3.3769635517173103e-07, "loss": 0.2735, "step": 5767 }, { "epoch": 2.6823748256084325, "grad_norm": 0.33280014991760254, "learning_rate": 3.3671946052397486e-07, "loss": 0.3409, "step": 5768 }, { "epoch": 2.68283986978763, "grad_norm": 0.3153855502605438, "learning_rate": 3.3574393167534247e-07, "loss": 0.2849, "step": 5769 }, { "epoch": 2.683304913966827, "grad_norm": 0.3374989926815033, "learning_rate": 3.347697689115509e-07, "loss": 0.3465, "step": 5770 }, { "epoch": 2.6837699581460237, "grad_norm": 0.32558801770210266, "learning_rate": 3.337969725179152e-07, "loss": 0.2962, "step": 5771 }, { "epoch": 2.684235002325221, "grad_norm": 0.31771788001060486, "learning_rate": 3.328255427793531e-07, "loss": 0.3208, "step": 5772 }, { "epoch": 2.684700046504418, "grad_norm": 0.3207005560398102, "learning_rate": 3.318554799803786e-07, "loss": 0.3074, "step": 5773 }, { "epoch": 2.685165090683615, "grad_norm": 0.3172682225704193, "learning_rate": 3.3088678440511e-07, "loss": 0.3266, "step": 5774 }, { "epoch": 2.685630134862812, "grad_norm": 0.3359968960285187, "learning_rate": 3.299194563372604e-07, "loss": 0.2934, "step": 5775 }, { "epoch": 2.686095179042009, "grad_norm": 0.345907598733902, "learning_rate": 3.289534960601454e-07, "loss": 0.3541, "step": 5776 }, { "epoch": 2.686560223221206, "grad_norm": 0.328243225812912, "learning_rate": 3.279889038566786e-07, "loss": 0.3324, "step": 5777 }, { "epoch": 2.687025267400403, "grad_norm": 0.2853672206401825, "learning_rate": 3.2702568000937404e-07, "loss": 0.2756, "step": 5778 }, { "epoch": 2.6874903115796, "grad_norm": 0.32393786311149597, "learning_rate": 3.260638248003434e-07, "loss": 0.3193, "step": 5779 }, { "epoch": 2.687955355758797, "grad_norm": 0.3525981903076172, "learning_rate": 3.2510333851129895e-07, "loss": 0.3326, "step": 5780 }, { "epoch": 2.688420399937994, "grad_norm": 0.347851037979126, "learning_rate": 3.2414422142355184e-07, "loss": 0.3176, "step": 5781 }, { "epoch": 2.688885444117191, "grad_norm": 0.3218478858470917, "learning_rate": 3.2318647381801237e-07, "loss": 0.311, "step": 5782 }, { "epoch": 2.6893504882963883, "grad_norm": 0.32304203510284424, "learning_rate": 3.222300959751873e-07, "loss": 0.3275, "step": 5783 }, { "epoch": 2.6898155324755852, "grad_norm": 0.33753058314323425, "learning_rate": 3.2127508817518637e-07, "loss": 0.3146, "step": 5784 }, { "epoch": 2.690280576654782, "grad_norm": 0.336089164018631, "learning_rate": 3.2032145069771424e-07, "loss": 0.3564, "step": 5785 }, { "epoch": 2.6907456208339795, "grad_norm": 0.3043957054615021, "learning_rate": 3.1936918382207696e-07, "loss": 0.3022, "step": 5786 }, { "epoch": 2.6912106650131764, "grad_norm": 0.31595054268836975, "learning_rate": 3.1841828782717685e-07, "loss": 0.3224, "step": 5787 }, { "epoch": 2.6916757091923733, "grad_norm": 0.35378995537757874, "learning_rate": 3.174687629915174e-07, "loss": 0.3291, "step": 5788 }, { "epoch": 2.69214075337157, "grad_norm": 0.31758201122283936, "learning_rate": 3.165206095931972e-07, "loss": 0.2988, "step": 5789 }, { "epoch": 2.692605797550767, "grad_norm": 0.3146103024482727, "learning_rate": 3.1557382790991686e-07, "loss": 0.332, "step": 5790 }, { "epoch": 2.6930708417299645, "grad_norm": 0.32033929228782654, "learning_rate": 3.146284182189718e-07, "loss": 0.304, "step": 5791 }, { "epoch": 2.6935358859091614, "grad_norm": 0.33226868510246277, "learning_rate": 3.1368438079725784e-07, "loss": 0.3309, "step": 5792 }, { "epoch": 2.6940009300883583, "grad_norm": 0.32148411870002747, "learning_rate": 3.1274171592126814e-07, "loss": 0.3002, "step": 5793 }, { "epoch": 2.6944659742675556, "grad_norm": 0.30781638622283936, "learning_rate": 3.1180042386709463e-07, "loss": 0.3173, "step": 5794 }, { "epoch": 2.6949310184467525, "grad_norm": 0.31994858384132385, "learning_rate": 3.108605049104246e-07, "loss": 0.3421, "step": 5795 }, { "epoch": 2.6953960626259494, "grad_norm": 0.34452393651008606, "learning_rate": 3.099219593265479e-07, "loss": 0.3137, "step": 5796 }, { "epoch": 2.6958611068051463, "grad_norm": 0.30855563282966614, "learning_rate": 3.089847873903462e-07, "loss": 0.3054, "step": 5797 }, { "epoch": 2.6963261509843433, "grad_norm": 0.31589534878730774, "learning_rate": 3.0804898937630444e-07, "loss": 0.3194, "step": 5798 }, { "epoch": 2.6967911951635406, "grad_norm": 0.33643150329589844, "learning_rate": 3.0711456555850117e-07, "loss": 0.3233, "step": 5799 }, { "epoch": 2.6972562393427375, "grad_norm": 0.34752482175827026, "learning_rate": 3.0618151621061464e-07, "loss": 0.3242, "step": 5800 }, { "epoch": 2.6977212835219344, "grad_norm": 0.31614622473716736, "learning_rate": 3.0524984160591963e-07, "loss": 0.275, "step": 5801 }, { "epoch": 2.6981863277011318, "grad_norm": 0.33584392070770264, "learning_rate": 3.043195420172879e-07, "loss": 0.3686, "step": 5802 }, { "epoch": 2.6986513718803287, "grad_norm": 0.3025575280189514, "learning_rate": 3.033906177171897e-07, "loss": 0.2816, "step": 5803 }, { "epoch": 2.6991164160595256, "grad_norm": 0.30763640999794006, "learning_rate": 3.024630689776914e-07, "loss": 0.294, "step": 5804 }, { "epoch": 2.699581460238723, "grad_norm": 0.3446342647075653, "learning_rate": 3.015368960704584e-07, "loss": 0.3349, "step": 5805 }, { "epoch": 2.70004650441792, "grad_norm": 0.3446672558784485, "learning_rate": 3.006120992667499e-07, "loss": 0.369, "step": 5806 }, { "epoch": 2.7005115485971167, "grad_norm": 0.29590803384780884, "learning_rate": 2.9968867883742534e-07, "loss": 0.2653, "step": 5807 }, { "epoch": 2.7009765927763136, "grad_norm": 0.33212965726852417, "learning_rate": 2.9876663505293833e-07, "loss": 0.3127, "step": 5808 }, { "epoch": 2.7014416369555105, "grad_norm": 0.32688334584236145, "learning_rate": 2.978459681833412e-07, "loss": 0.3235, "step": 5809 }, { "epoch": 2.701906681134708, "grad_norm": 0.3344423174858093, "learning_rate": 2.969266784982822e-07, "loss": 0.3131, "step": 5810 }, { "epoch": 2.702371725313905, "grad_norm": 0.3305979371070862, "learning_rate": 2.9600876626700637e-07, "loss": 0.3075, "step": 5811 }, { "epoch": 2.7028367694931017, "grad_norm": 0.36284005641937256, "learning_rate": 2.9509223175835487e-07, "loss": 0.3072, "step": 5812 }, { "epoch": 2.703301813672299, "grad_norm": 0.3447756767272949, "learning_rate": 2.941770752407669e-07, "loss": 0.319, "step": 5813 }, { "epoch": 2.703766857851496, "grad_norm": 0.3431076109409332, "learning_rate": 2.9326329698227516e-07, "loss": 0.32, "step": 5814 }, { "epoch": 2.704231902030693, "grad_norm": 0.32309937477111816, "learning_rate": 2.923508972505118e-07, "loss": 0.3199, "step": 5815 }, { "epoch": 2.70469694620989, "grad_norm": 0.3181701600551605, "learning_rate": 2.9143987631270296e-07, "loss": 0.311, "step": 5816 }, { "epoch": 2.705161990389087, "grad_norm": 0.3445183038711548, "learning_rate": 2.905302344356742e-07, "loss": 0.3241, "step": 5817 }, { "epoch": 2.705627034568284, "grad_norm": 0.3397235870361328, "learning_rate": 2.8962197188584175e-07, "loss": 0.3536, "step": 5818 }, { "epoch": 2.706092078747481, "grad_norm": 0.3191192150115967, "learning_rate": 2.8871508892922286e-07, "loss": 0.3158, "step": 5819 }, { "epoch": 2.706557122926678, "grad_norm": 0.3246058225631714, "learning_rate": 2.878095858314278e-07, "loss": 0.3048, "step": 5820 }, { "epoch": 2.707022167105875, "grad_norm": 0.3111349940299988, "learning_rate": 2.869054628576651e-07, "loss": 0.3247, "step": 5821 }, { "epoch": 2.707487211285072, "grad_norm": 0.3155844211578369, "learning_rate": 2.860027202727361e-07, "loss": 0.329, "step": 5822 }, { "epoch": 2.707952255464269, "grad_norm": 0.2947717010974884, "learning_rate": 2.851013583410406e-07, "loss": 0.2873, "step": 5823 }, { "epoch": 2.7084172996434663, "grad_norm": 0.34042876958847046, "learning_rate": 2.8420137732657174e-07, "loss": 0.3402, "step": 5824 }, { "epoch": 2.7088823438226632, "grad_norm": 0.31821197271347046, "learning_rate": 2.833027774929209e-07, "loss": 0.3091, "step": 5825 }, { "epoch": 2.70934738800186, "grad_norm": 0.33676040172576904, "learning_rate": 2.824055591032715e-07, "loss": 0.3005, "step": 5826 }, { "epoch": 2.709812432181057, "grad_norm": 0.3211042582988739, "learning_rate": 2.81509722420405e-07, "loss": 0.3051, "step": 5827 }, { "epoch": 2.710277476360254, "grad_norm": 0.3290630280971527, "learning_rate": 2.8061526770669813e-07, "loss": 0.3338, "step": 5828 }, { "epoch": 2.7107425205394513, "grad_norm": 0.3298051953315735, "learning_rate": 2.7972219522412194e-07, "loss": 0.3061, "step": 5829 }, { "epoch": 2.711207564718648, "grad_norm": 0.3290122449398041, "learning_rate": 2.7883050523424214e-07, "loss": 0.3103, "step": 5830 }, { "epoch": 2.711672608897845, "grad_norm": 0.3085375130176544, "learning_rate": 2.779401979982216e-07, "loss": 0.3207, "step": 5831 }, { "epoch": 2.7121376530770425, "grad_norm": 0.3622879385948181, "learning_rate": 2.7705127377681494e-07, "loss": 0.3434, "step": 5832 }, { "epoch": 2.7126026972562394, "grad_norm": 0.30665135383605957, "learning_rate": 2.7616373283037514e-07, "loss": 0.2987, "step": 5833 }, { "epoch": 2.7130677414354363, "grad_norm": 0.31220006942749023, "learning_rate": 2.752775754188475e-07, "loss": 0.3643, "step": 5834 }, { "epoch": 2.7135327856146336, "grad_norm": 0.31486833095550537, "learning_rate": 2.743928018017744e-07, "loss": 0.3106, "step": 5835 }, { "epoch": 2.7139978297938305, "grad_norm": 0.33274805545806885, "learning_rate": 2.7350941223828975e-07, "loss": 0.3206, "step": 5836 }, { "epoch": 2.7144628739730274, "grad_norm": 0.3076656758785248, "learning_rate": 2.72627406987126e-07, "loss": 0.3123, "step": 5837 }, { "epoch": 2.7149279181522243, "grad_norm": 0.3071771264076233, "learning_rate": 2.71746786306607e-07, "loss": 0.3229, "step": 5838 }, { "epoch": 2.7153929623314212, "grad_norm": 0.32797494530677795, "learning_rate": 2.708675504546521e-07, "loss": 0.3368, "step": 5839 }, { "epoch": 2.7158580065106186, "grad_norm": 0.30586639046669006, "learning_rate": 2.699896996887763e-07, "loss": 0.2811, "step": 5840 }, { "epoch": 2.7163230506898155, "grad_norm": 0.31758421659469604, "learning_rate": 2.691132342660868e-07, "loss": 0.3155, "step": 5841 }, { "epoch": 2.7167880948690124, "grad_norm": 0.34208863973617554, "learning_rate": 2.682381544432866e-07, "loss": 0.3223, "step": 5842 }, { "epoch": 2.7172531390482098, "grad_norm": 0.29078277945518494, "learning_rate": 2.673644604766718e-07, "loss": 0.2754, "step": 5843 }, { "epoch": 2.7177181832274067, "grad_norm": 0.33728206157684326, "learning_rate": 2.664921526221348e-07, "loss": 0.3452, "step": 5844 }, { "epoch": 2.7181832274066036, "grad_norm": 0.29417553544044495, "learning_rate": 2.65621231135158e-07, "loss": 0.2996, "step": 5845 }, { "epoch": 2.718648271585801, "grad_norm": 0.36140957474708557, "learning_rate": 2.647516962708219e-07, "loss": 0.3438, "step": 5846 }, { "epoch": 2.719113315764998, "grad_norm": 0.30681052803993225, "learning_rate": 2.6388354828379813e-07, "loss": 0.3148, "step": 5847 }, { "epoch": 2.7195783599441947, "grad_norm": 0.31248223781585693, "learning_rate": 2.63016787428354e-07, "loss": 0.3289, "step": 5848 }, { "epoch": 2.7200434041233916, "grad_norm": 0.32591187953948975, "learning_rate": 2.621514139583492e-07, "loss": 0.3017, "step": 5849 }, { "epoch": 2.7205084483025885, "grad_norm": 0.32240256667137146, "learning_rate": 2.612874281272371e-07, "loss": 0.3067, "step": 5850 }, { "epoch": 2.720973492481786, "grad_norm": 0.3212602138519287, "learning_rate": 2.6042483018806577e-07, "loss": 0.2961, "step": 5851 }, { "epoch": 2.721438536660983, "grad_norm": 0.35369014739990234, "learning_rate": 2.595636203934765e-07, "loss": 0.3466, "step": 5852 }, { "epoch": 2.7219035808401797, "grad_norm": 0.3174188435077667, "learning_rate": 2.587037989957031e-07, "loss": 0.3181, "step": 5853 }, { "epoch": 2.722368625019377, "grad_norm": 0.356917142868042, "learning_rate": 2.5784536624657354e-07, "loss": 0.3176, "step": 5854 }, { "epoch": 2.722833669198574, "grad_norm": 0.36071768403053284, "learning_rate": 2.569883223975078e-07, "loss": 0.3259, "step": 5855 }, { "epoch": 2.723298713377771, "grad_norm": 0.3190617561340332, "learning_rate": 2.5613266769952183e-07, "loss": 0.3215, "step": 5856 }, { "epoch": 2.7237637575569678, "grad_norm": 0.31165647506713867, "learning_rate": 2.552784024032218e-07, "loss": 0.2974, "step": 5857 }, { "epoch": 2.7242288017361647, "grad_norm": 0.32557412981987, "learning_rate": 2.544255267588086e-07, "loss": 0.3313, "step": 5858 }, { "epoch": 2.724693845915362, "grad_norm": 0.329074889421463, "learning_rate": 2.535740410160753e-07, "loss": 0.3183, "step": 5859 }, { "epoch": 2.725158890094559, "grad_norm": 0.3058283030986786, "learning_rate": 2.5272394542440847e-07, "loss": 0.3057, "step": 5860 }, { "epoch": 2.725623934273756, "grad_norm": 0.3319613039493561, "learning_rate": 2.518752402327873e-07, "loss": 0.2884, "step": 5861 }, { "epoch": 2.726088978452953, "grad_norm": 0.3359425663948059, "learning_rate": 2.5102792568978354e-07, "loss": 0.3352, "step": 5862 }, { "epoch": 2.72655402263215, "grad_norm": 0.32430392503738403, "learning_rate": 2.501820020435619e-07, "loss": 0.341, "step": 5863 }, { "epoch": 2.727019066811347, "grad_norm": 0.35566022992134094, "learning_rate": 2.4933746954188045e-07, "loss": 0.2976, "step": 5864 }, { "epoch": 2.7274841109905443, "grad_norm": 0.3226439356803894, "learning_rate": 2.4849432843208786e-07, "loss": 0.3071, "step": 5865 }, { "epoch": 2.7279491551697412, "grad_norm": 0.3440340757369995, "learning_rate": 2.476525789611278e-07, "loss": 0.3447, "step": 5866 }, { "epoch": 2.728414199348938, "grad_norm": 0.32926130294799805, "learning_rate": 2.4681222137553304e-07, "loss": 0.3287, "step": 5867 }, { "epoch": 2.728879243528135, "grad_norm": 0.30582261085510254, "learning_rate": 2.4597325592143285e-07, "loss": 0.3176, "step": 5868 }, { "epoch": 2.729344287707332, "grad_norm": 0.3010387718677521, "learning_rate": 2.4513568284454504e-07, "loss": 0.3091, "step": 5869 }, { "epoch": 2.7298093318865293, "grad_norm": 0.3204473555088043, "learning_rate": 2.4429950239018285e-07, "loss": 0.3201, "step": 5870 }, { "epoch": 2.730274376065726, "grad_norm": 0.31000176072120667, "learning_rate": 2.4346471480324763e-07, "loss": 0.3046, "step": 5871 }, { "epoch": 2.730739420244923, "grad_norm": 0.323900431394577, "learning_rate": 2.4263132032823656e-07, "loss": 0.3215, "step": 5872 }, { "epoch": 2.7312044644241205, "grad_norm": 0.34527266025543213, "learning_rate": 2.417993192092372e-07, "loss": 0.3417, "step": 5873 }, { "epoch": 2.7316695086033174, "grad_norm": 0.3277050852775574, "learning_rate": 2.409687116899284e-07, "loss": 0.3255, "step": 5874 }, { "epoch": 2.7321345527825143, "grad_norm": 0.32256096601486206, "learning_rate": 2.401394980135835e-07, "loss": 0.301, "step": 5875 }, { "epoch": 2.7325995969617116, "grad_norm": 0.3484511971473694, "learning_rate": 2.3931167842306314e-07, "loss": 0.3085, "step": 5876 }, { "epoch": 2.7330646411409085, "grad_norm": 0.317190557718277, "learning_rate": 2.3848525316082503e-07, "loss": 0.3135, "step": 5877 }, { "epoch": 2.7335296853201054, "grad_norm": 0.3190753757953644, "learning_rate": 2.3766022246891284e-07, "loss": 0.3283, "step": 5878 }, { "epoch": 2.7339947294993023, "grad_norm": 0.3363284468650818, "learning_rate": 2.3683658658896713e-07, "loss": 0.3127, "step": 5879 }, { "epoch": 2.7344597736784992, "grad_norm": 0.32483959197998047, "learning_rate": 2.3601434576221548e-07, "loss": 0.3224, "step": 5880 }, { "epoch": 2.7349248178576966, "grad_norm": 0.29627302289009094, "learning_rate": 2.3519350022948083e-07, "loss": 0.3114, "step": 5881 }, { "epoch": 2.7353898620368935, "grad_norm": 0.32418790459632874, "learning_rate": 2.3437405023117366e-07, "loss": 0.3299, "step": 5882 }, { "epoch": 2.7358549062160904, "grad_norm": 0.33903124928474426, "learning_rate": 2.3355599600729916e-07, "loss": 0.325, "step": 5883 }, { "epoch": 2.7363199503952877, "grad_norm": 0.2996440827846527, "learning_rate": 2.3273933779745016e-07, "loss": 0.2877, "step": 5884 }, { "epoch": 2.7367849945744847, "grad_norm": 0.3519968092441559, "learning_rate": 2.3192407584081423e-07, "loss": 0.3545, "step": 5885 }, { "epoch": 2.7372500387536816, "grad_norm": 0.3154807984828949, "learning_rate": 2.3111021037616755e-07, "loss": 0.2675, "step": 5886 }, { "epoch": 2.7377150829328785, "grad_norm": 0.33764511346817017, "learning_rate": 2.3029774164187945e-07, "loss": 0.3311, "step": 5887 }, { "epoch": 2.738180127112076, "grad_norm": 0.31895461678504944, "learning_rate": 2.2948666987590683e-07, "loss": 0.2989, "step": 5888 }, { "epoch": 2.7386451712912727, "grad_norm": 0.30818265676498413, "learning_rate": 2.2867699531580134e-07, "loss": 0.2961, "step": 5889 }, { "epoch": 2.7391102154704696, "grad_norm": 0.32495778799057007, "learning_rate": 2.278687181987016e-07, "loss": 0.3442, "step": 5890 }, { "epoch": 2.7395752596496665, "grad_norm": 0.3362978994846344, "learning_rate": 2.2706183876134047e-07, "loss": 0.3452, "step": 5891 }, { "epoch": 2.740040303828864, "grad_norm": 0.32582196593284607, "learning_rate": 2.262563572400389e-07, "loss": 0.3089, "step": 5892 }, { "epoch": 2.740505348008061, "grad_norm": 0.30926793813705444, "learning_rate": 2.2545227387070988e-07, "loss": 0.293, "step": 5893 }, { "epoch": 2.7409703921872577, "grad_norm": 0.3550074100494385, "learning_rate": 2.2464958888885613e-07, "loss": 0.3285, "step": 5894 }, { "epoch": 2.741435436366455, "grad_norm": 0.30779311060905457, "learning_rate": 2.2384830252957068e-07, "loss": 0.3237, "step": 5895 }, { "epoch": 2.741900480545652, "grad_norm": 0.32127946615219116, "learning_rate": 2.2304841502753804e-07, "loss": 0.3402, "step": 5896 }, { "epoch": 2.742365524724849, "grad_norm": 0.3655702471733093, "learning_rate": 2.2224992661703139e-07, "loss": 0.3244, "step": 5897 }, { "epoch": 2.7428305689040458, "grad_norm": 0.3156966269016266, "learning_rate": 2.2145283753191526e-07, "loss": 0.2846, "step": 5898 }, { "epoch": 2.7432956130832427, "grad_norm": 0.32252970337867737, "learning_rate": 2.206571480056452e-07, "loss": 0.3368, "step": 5899 }, { "epoch": 2.74376065726244, "grad_norm": 0.33229538798332214, "learning_rate": 2.1986285827126418e-07, "loss": 0.3075, "step": 5900 }, { "epoch": 2.744225701441637, "grad_norm": 0.33600881695747375, "learning_rate": 2.1906996856140783e-07, "loss": 0.334, "step": 5901 }, { "epoch": 2.744690745620834, "grad_norm": 0.4041096270084381, "learning_rate": 2.1827847910830034e-07, "loss": 0.3149, "step": 5902 }, { "epoch": 2.745155789800031, "grad_norm": 0.3284103572368622, "learning_rate": 2.1748839014375632e-07, "loss": 0.3283, "step": 5903 }, { "epoch": 2.745620833979228, "grad_norm": 0.2934693396091461, "learning_rate": 2.16699701899179e-07, "loss": 0.2902, "step": 5904 }, { "epoch": 2.746085878158425, "grad_norm": 0.3179258108139038, "learning_rate": 2.1591241460556355e-07, "loss": 0.3039, "step": 5905 }, { "epoch": 2.7465509223376223, "grad_norm": 0.32590535283088684, "learning_rate": 2.151265284934928e-07, "loss": 0.3358, "step": 5906 }, { "epoch": 2.7470159665168192, "grad_norm": 0.3357870280742645, "learning_rate": 2.143420437931415e-07, "loss": 0.3274, "step": 5907 }, { "epoch": 2.747481010696016, "grad_norm": 0.3348116874694824, "learning_rate": 2.1355896073427028e-07, "loss": 0.2949, "step": 5908 }, { "epoch": 2.747946054875213, "grad_norm": 0.31171727180480957, "learning_rate": 2.127772795462324e-07, "loss": 0.3113, "step": 5909 }, { "epoch": 2.74841109905441, "grad_norm": 0.30832639336586, "learning_rate": 2.1199700045797077e-07, "loss": 0.3246, "step": 5910 }, { "epoch": 2.7488761432336073, "grad_norm": 0.3134484887123108, "learning_rate": 2.112181236980143e-07, "loss": 0.3223, "step": 5911 }, { "epoch": 2.749341187412804, "grad_norm": 0.32488688826560974, "learning_rate": 2.104406494944855e-07, "loss": 0.3119, "step": 5912 }, { "epoch": 2.749806231592001, "grad_norm": 0.3216099739074707, "learning_rate": 2.0966457807509222e-07, "loss": 0.3364, "step": 5913 }, { "epoch": 2.7502712757711985, "grad_norm": 0.32613542675971985, "learning_rate": 2.088899096671343e-07, "loss": 0.3025, "step": 5914 }, { "epoch": 2.7507363199503954, "grad_norm": 0.32675376534461975, "learning_rate": 2.0811664449749857e-07, "loss": 0.3239, "step": 5915 }, { "epoch": 2.7512013641295923, "grad_norm": 0.28670793771743774, "learning_rate": 2.073447827926628e-07, "loss": 0.2655, "step": 5916 }, { "epoch": 2.751666408308789, "grad_norm": 0.31409409642219543, "learning_rate": 2.0657432477869165e-07, "loss": 0.3476, "step": 5917 }, { "epoch": 2.7521314524879865, "grad_norm": 0.3178742229938507, "learning_rate": 2.0580527068124134e-07, "loss": 0.3484, "step": 5918 }, { "epoch": 2.7525964966671834, "grad_norm": 0.3407832980155945, "learning_rate": 2.0503762072555387e-07, "loss": 0.329, "step": 5919 }, { "epoch": 2.7530615408463803, "grad_norm": 0.3183245360851288, "learning_rate": 2.0427137513646167e-07, "loss": 0.3063, "step": 5920 }, { "epoch": 2.7535265850255772, "grad_norm": 0.3081268072128296, "learning_rate": 2.0350653413838573e-07, "loss": 0.3044, "step": 5921 }, { "epoch": 2.7539916292047746, "grad_norm": 0.31228625774383545, "learning_rate": 2.0274309795533687e-07, "loss": 0.3156, "step": 5922 }, { "epoch": 2.7544566733839715, "grad_norm": 0.3150801956653595, "learning_rate": 2.0198106681091124e-07, "loss": 0.3221, "step": 5923 }, { "epoch": 2.7549217175631684, "grad_norm": 0.2927737534046173, "learning_rate": 2.012204409282964e-07, "loss": 0.2869, "step": 5924 }, { "epoch": 2.7553867617423657, "grad_norm": 0.32298728823661804, "learning_rate": 2.0046122053026697e-07, "loss": 0.3349, "step": 5925 }, { "epoch": 2.7558518059215626, "grad_norm": 0.34636053442955017, "learning_rate": 1.9970340583918668e-07, "loss": 0.33, "step": 5926 }, { "epoch": 2.7563168501007596, "grad_norm": 0.3538902699947357, "learning_rate": 1.989469970770064e-07, "loss": 0.3174, "step": 5927 }, { "epoch": 2.7567818942799565, "grad_norm": 0.32646122574806213, "learning_rate": 1.9819199446526716e-07, "loss": 0.2947, "step": 5928 }, { "epoch": 2.7572469384591534, "grad_norm": 0.35514765977859497, "learning_rate": 1.9743839822509547e-07, "loss": 0.3108, "step": 5929 }, { "epoch": 2.7577119826383507, "grad_norm": 0.31268438696861267, "learning_rate": 1.9668620857720865e-07, "loss": 0.3144, "step": 5930 }, { "epoch": 2.7581770268175476, "grad_norm": 0.2988133728504181, "learning_rate": 1.9593542574190993e-07, "loss": 0.2633, "step": 5931 }, { "epoch": 2.7586420709967445, "grad_norm": 0.3349534869194031, "learning_rate": 1.9518604993909175e-07, "loss": 0.3689, "step": 5932 }, { "epoch": 2.759107115175942, "grad_norm": 0.3589261472225189, "learning_rate": 1.9443808138823404e-07, "loss": 0.3216, "step": 5933 }, { "epoch": 2.7595721593551388, "grad_norm": 0.3319076895713806, "learning_rate": 1.9369152030840553e-07, "loss": 0.3064, "step": 5934 }, { "epoch": 2.7600372035343357, "grad_norm": 0.30595728754997253, "learning_rate": 1.9294636691826073e-07, "loss": 0.3035, "step": 5935 }, { "epoch": 2.760502247713533, "grad_norm": 0.32479432225227356, "learning_rate": 1.9220262143604395e-07, "loss": 0.317, "step": 5936 }, { "epoch": 2.76096729189273, "grad_norm": 0.33985117077827454, "learning_rate": 1.9146028407958483e-07, "loss": 0.3118, "step": 5937 }, { "epoch": 2.761432336071927, "grad_norm": 0.32750189304351807, "learning_rate": 1.907193550663028e-07, "loss": 0.3164, "step": 5938 }, { "epoch": 2.7618973802511237, "grad_norm": 0.3345829248428345, "learning_rate": 1.899798346132037e-07, "loss": 0.3506, "step": 5939 }, { "epoch": 2.7623624244303207, "grad_norm": 0.31959566473960876, "learning_rate": 1.8924172293688148e-07, "loss": 0.2971, "step": 5940 }, { "epoch": 2.762827468609518, "grad_norm": 0.3132396638393402, "learning_rate": 1.885050202535166e-07, "loss": 0.2998, "step": 5941 }, { "epoch": 2.763292512788715, "grad_norm": 0.3381306231021881, "learning_rate": 1.877697267788775e-07, "loss": 0.3455, "step": 5942 }, { "epoch": 2.763757556967912, "grad_norm": 0.3132924735546112, "learning_rate": 1.870358427283192e-07, "loss": 0.3172, "step": 5943 }, { "epoch": 2.764222601147109, "grad_norm": 0.303196519613266, "learning_rate": 1.8630336831678475e-07, "loss": 0.3258, "step": 5944 }, { "epoch": 2.764687645326306, "grad_norm": 0.3348020017147064, "learning_rate": 1.8557230375880364e-07, "loss": 0.3243, "step": 5945 }, { "epoch": 2.765152689505503, "grad_norm": 0.33610332012176514, "learning_rate": 1.848426492684946e-07, "loss": 0.362, "step": 5946 }, { "epoch": 2.7656177336847003, "grad_norm": 0.31693512201309204, "learning_rate": 1.8411440505956e-07, "loss": 0.2884, "step": 5947 }, { "epoch": 2.7660827778638972, "grad_norm": 0.3176809847354889, "learning_rate": 1.833875713452904e-07, "loss": 0.3094, "step": 5948 }, { "epoch": 2.766547822043094, "grad_norm": 0.3155810534954071, "learning_rate": 1.8266214833856432e-07, "loss": 0.3281, "step": 5949 }, { "epoch": 2.767012866222291, "grad_norm": 0.31794315576553345, "learning_rate": 1.819381362518463e-07, "loss": 0.3407, "step": 5950 }, { "epoch": 2.767477910401488, "grad_norm": 0.2952190935611725, "learning_rate": 1.8121553529718782e-07, "loss": 0.296, "step": 5951 }, { "epoch": 2.7679429545806853, "grad_norm": 0.32220354676246643, "learning_rate": 1.8049434568622627e-07, "loss": 0.3271, "step": 5952 }, { "epoch": 2.768407998759882, "grad_norm": 0.3344385027885437, "learning_rate": 1.7977456763018764e-07, "loss": 0.321, "step": 5953 }, { "epoch": 2.768873042939079, "grad_norm": 0.330684095621109, "learning_rate": 1.7905620133988166e-07, "loss": 0.3028, "step": 5954 }, { "epoch": 2.7693380871182764, "grad_norm": 0.31999465823173523, "learning_rate": 1.7833924702570725e-07, "loss": 0.3465, "step": 5955 }, { "epoch": 2.7698031312974734, "grad_norm": 0.28892767429351807, "learning_rate": 1.7762370489764813e-07, "loss": 0.3031, "step": 5956 }, { "epoch": 2.7702681754766703, "grad_norm": 0.3207509219646454, "learning_rate": 1.7690957516527607e-07, "loss": 0.322, "step": 5957 }, { "epoch": 2.770733219655867, "grad_norm": 0.32161515951156616, "learning_rate": 1.76196858037746e-07, "loss": 0.3114, "step": 5958 }, { "epoch": 2.771198263835064, "grad_norm": 0.3222302496433258, "learning_rate": 1.7548555372380372e-07, "loss": 0.3151, "step": 5959 }, { "epoch": 2.7716633080142614, "grad_norm": 0.3225840926170349, "learning_rate": 1.7477566243177647e-07, "loss": 0.301, "step": 5960 }, { "epoch": 2.7721283521934583, "grad_norm": 0.33795180916786194, "learning_rate": 1.740671843695818e-07, "loss": 0.3138, "step": 5961 }, { "epoch": 2.7725933963726552, "grad_norm": 0.3079066574573517, "learning_rate": 1.7336011974471933e-07, "loss": 0.3115, "step": 5962 }, { "epoch": 2.7730584405518526, "grad_norm": 0.317829430103302, "learning_rate": 1.7265446876427895e-07, "loss": 0.3104, "step": 5963 }, { "epoch": 2.7735234847310495, "grad_norm": 0.32310232520103455, "learning_rate": 1.7195023163493253e-07, "loss": 0.318, "step": 5964 }, { "epoch": 2.7739885289102464, "grad_norm": 0.3459528386592865, "learning_rate": 1.712474085629412e-07, "loss": 0.3189, "step": 5965 }, { "epoch": 2.7744535730894437, "grad_norm": 0.3017194867134094, "learning_rate": 1.7054599975414866e-07, "loss": 0.3033, "step": 5966 }, { "epoch": 2.7749186172686406, "grad_norm": 0.32311463356018066, "learning_rate": 1.6984600541398777e-07, "loss": 0.3371, "step": 5967 }, { "epoch": 2.7753836614478375, "grad_norm": 0.29081717133522034, "learning_rate": 1.6914742574747455e-07, "loss": 0.3338, "step": 5968 }, { "epoch": 2.7758487056270345, "grad_norm": 0.3164746165275574, "learning_rate": 1.6845026095921314e-07, "loss": 0.3281, "step": 5969 }, { "epoch": 2.7763137498062314, "grad_norm": 0.3146812915802002, "learning_rate": 1.677545112533896e-07, "loss": 0.3042, "step": 5970 }, { "epoch": 2.7767787939854287, "grad_norm": 0.3127114474773407, "learning_rate": 1.6706017683377928e-07, "loss": 0.2914, "step": 5971 }, { "epoch": 2.7772438381646256, "grad_norm": 0.3269754946231842, "learning_rate": 1.663672579037412e-07, "loss": 0.3242, "step": 5972 }, { "epoch": 2.7777088823438225, "grad_norm": 0.323904424905777, "learning_rate": 1.6567575466621964e-07, "loss": 0.2977, "step": 5973 }, { "epoch": 2.77817392652302, "grad_norm": 0.3124120235443115, "learning_rate": 1.6498566732374433e-07, "loss": 0.328, "step": 5974 }, { "epoch": 2.7786389707022168, "grad_norm": 0.31669923663139343, "learning_rate": 1.6429699607843185e-07, "loss": 0.3143, "step": 5975 }, { "epoch": 2.7791040148814137, "grad_norm": 0.3284464478492737, "learning_rate": 1.6360974113198203e-07, "loss": 0.3065, "step": 5976 }, { "epoch": 2.779569059060611, "grad_norm": 0.31875741481781006, "learning_rate": 1.6292390268568103e-07, "loss": 0.316, "step": 5977 }, { "epoch": 2.780034103239808, "grad_norm": 0.34978044033050537, "learning_rate": 1.6223948094039876e-07, "loss": 0.3321, "step": 5978 }, { "epoch": 2.780499147419005, "grad_norm": 0.32049715518951416, "learning_rate": 1.615564760965921e-07, "loss": 0.3292, "step": 5979 }, { "epoch": 2.7809641915982017, "grad_norm": 0.30878329277038574, "learning_rate": 1.6087488835430208e-07, "loss": 0.3102, "step": 5980 }, { "epoch": 2.7814292357773986, "grad_norm": 0.3212791979312897, "learning_rate": 1.6019471791315522e-07, "loss": 0.2984, "step": 5981 }, { "epoch": 2.781894279956596, "grad_norm": 0.3322344124317169, "learning_rate": 1.5951596497236154e-07, "loss": 0.3274, "step": 5982 }, { "epoch": 2.782359324135793, "grad_norm": 0.3315823972225189, "learning_rate": 1.5883862973071652e-07, "loss": 0.3367, "step": 5983 }, { "epoch": 2.78282436831499, "grad_norm": 0.33415260910987854, "learning_rate": 1.5816271238660196e-07, "loss": 0.3106, "step": 5984 }, { "epoch": 2.783289412494187, "grad_norm": 0.31730806827545166, "learning_rate": 1.5748821313798124e-07, "loss": 0.3076, "step": 5985 }, { "epoch": 2.783754456673384, "grad_norm": 0.3219851553440094, "learning_rate": 1.5681513218240573e-07, "loss": 0.3218, "step": 5986 }, { "epoch": 2.784219500852581, "grad_norm": 0.30945420265197754, "learning_rate": 1.5614346971700945e-07, "loss": 0.3242, "step": 5987 }, { "epoch": 2.784684545031778, "grad_norm": 0.3280915915966034, "learning_rate": 1.554732259385111e-07, "loss": 0.3007, "step": 5988 }, { "epoch": 2.7851495892109748, "grad_norm": 0.320148229598999, "learning_rate": 1.5480440104321481e-07, "loss": 0.3254, "step": 5989 }, { "epoch": 2.785614633390172, "grad_norm": 0.31728118658065796, "learning_rate": 1.5413699522700775e-07, "loss": 0.3202, "step": 5990 }, { "epoch": 2.786079677569369, "grad_norm": 0.30805543065071106, "learning_rate": 1.5347100868536246e-07, "loss": 0.3265, "step": 5991 }, { "epoch": 2.786544721748566, "grad_norm": 0.3176276385784149, "learning_rate": 1.5280644161333625e-07, "loss": 0.3275, "step": 5992 }, { "epoch": 2.7870097659277633, "grad_norm": 0.3044149577617645, "learning_rate": 1.521432942055695e-07, "loss": 0.297, "step": 5993 }, { "epoch": 2.78747481010696, "grad_norm": 0.3212355971336365, "learning_rate": 1.51481566656288e-07, "loss": 0.3568, "step": 5994 }, { "epoch": 2.787939854286157, "grad_norm": 0.3032307028770447, "learning_rate": 1.5082125915929946e-07, "loss": 0.3037, "step": 5995 }, { "epoch": 2.7884048984653544, "grad_norm": 0.3063505291938782, "learning_rate": 1.5016237190799866e-07, "loss": 0.2971, "step": 5996 }, { "epoch": 2.7888699426445513, "grad_norm": 0.2914872169494629, "learning_rate": 1.4950490509536176e-07, "loss": 0.313, "step": 5997 }, { "epoch": 2.7893349868237483, "grad_norm": 0.3118445575237274, "learning_rate": 1.4884885891395196e-07, "loss": 0.3192, "step": 5998 }, { "epoch": 2.789800031002945, "grad_norm": 0.33914464712142944, "learning_rate": 1.4819423355591223e-07, "loss": 0.348, "step": 5999 }, { "epoch": 2.790265075182142, "grad_norm": 0.32917240262031555, "learning_rate": 1.4754102921297363e-07, "loss": 0.3033, "step": 6000 }, { "epoch": 2.7907301193613394, "grad_norm": 0.33126407861709595, "learning_rate": 1.4688924607644817e-07, "loss": 0.312, "step": 6001 }, { "epoch": 2.7911951635405363, "grad_norm": 0.3238784372806549, "learning_rate": 1.46238884337232e-07, "loss": 0.3091, "step": 6002 }, { "epoch": 2.7916602077197332, "grad_norm": 0.32593873143196106, "learning_rate": 1.4558994418580663e-07, "loss": 0.3353, "step": 6003 }, { "epoch": 2.7921252518989306, "grad_norm": 0.2928268611431122, "learning_rate": 1.4494242581223615e-07, "loss": 0.2997, "step": 6004 }, { "epoch": 2.7925902960781275, "grad_norm": 0.3363851308822632, "learning_rate": 1.4429632940616721e-07, "loss": 0.3243, "step": 6005 }, { "epoch": 2.7930553402573244, "grad_norm": 0.3205671012401581, "learning_rate": 1.4365165515683176e-07, "loss": 0.3337, "step": 6006 }, { "epoch": 2.7935203844365217, "grad_norm": 0.3109828233718872, "learning_rate": 1.4300840325304377e-07, "loss": 0.2985, "step": 6007 }, { "epoch": 2.7939854286157186, "grad_norm": 0.29912734031677246, "learning_rate": 1.4236657388320198e-07, "loss": 0.3093, "step": 6008 }, { "epoch": 2.7944504727949155, "grad_norm": 0.3295075297355652, "learning_rate": 1.417261672352871e-07, "loss": 0.3717, "step": 6009 }, { "epoch": 2.7949155169741124, "grad_norm": 0.29881277680397034, "learning_rate": 1.4108718349686468e-07, "loss": 0.3031, "step": 6010 }, { "epoch": 2.7953805611533094, "grad_norm": 0.32853347063064575, "learning_rate": 1.4044962285508113e-07, "loss": 0.3193, "step": 6011 }, { "epoch": 2.7958456053325067, "grad_norm": 0.32658717036247253, "learning_rate": 1.3981348549666928e-07, "loss": 0.3161, "step": 6012 }, { "epoch": 2.7963106495117036, "grad_norm": 0.3037392795085907, "learning_rate": 1.3917877160794236e-07, "loss": 0.3352, "step": 6013 }, { "epoch": 2.7967756936909005, "grad_norm": 0.3436054289340973, "learning_rate": 1.385454813747983e-07, "loss": 0.328, "step": 6014 }, { "epoch": 2.797240737870098, "grad_norm": 0.2963773012161255, "learning_rate": 1.3791361498271704e-07, "loss": 0.2987, "step": 6015 }, { "epoch": 2.7977057820492948, "grad_norm": 0.3178195655345917, "learning_rate": 1.3728317261676338e-07, "loss": 0.3486, "step": 6016 }, { "epoch": 2.7981708262284917, "grad_norm": 0.39669740200042725, "learning_rate": 1.3665415446158182e-07, "loss": 0.3288, "step": 6017 }, { "epoch": 2.7986358704076886, "grad_norm": 0.3178330659866333, "learning_rate": 1.3602656070140275e-07, "loss": 0.3005, "step": 6018 }, { "epoch": 2.7991009145868855, "grad_norm": 0.31485146284103394, "learning_rate": 1.354003915200375e-07, "loss": 0.3319, "step": 6019 }, { "epoch": 2.799565958766083, "grad_norm": 0.31439441442489624, "learning_rate": 1.3477564710088097e-07, "loss": 0.2969, "step": 6020 }, { "epoch": 2.8000310029452797, "grad_norm": 0.3106626570224762, "learning_rate": 1.3415232762691134e-07, "loss": 0.3352, "step": 6021 }, { "epoch": 2.8004960471244766, "grad_norm": 0.3026930093765259, "learning_rate": 1.335304332806875e-07, "loss": 0.312, "step": 6022 }, { "epoch": 2.800961091303674, "grad_norm": 0.31321385502815247, "learning_rate": 1.3290996424435375e-07, "loss": 0.2761, "step": 6023 }, { "epoch": 2.801426135482871, "grad_norm": 0.32276248931884766, "learning_rate": 1.3229092069963368e-07, "loss": 0.3453, "step": 6024 }, { "epoch": 2.801891179662068, "grad_norm": 0.3255274295806885, "learning_rate": 1.3167330282783608e-07, "loss": 0.3046, "step": 6025 }, { "epoch": 2.802356223841265, "grad_norm": 0.32487818598747253, "learning_rate": 1.3105711080985128e-07, "loss": 0.3285, "step": 6026 }, { "epoch": 2.802821268020462, "grad_norm": 0.43458884954452515, "learning_rate": 1.3044234482615216e-07, "loss": 0.3202, "step": 6027 }, { "epoch": 2.803286312199659, "grad_norm": 0.32257169485092163, "learning_rate": 1.298290050567924e-07, "loss": 0.3324, "step": 6028 }, { "epoch": 2.803751356378856, "grad_norm": 0.3104286193847656, "learning_rate": 1.2921709168141116e-07, "loss": 0.3087, "step": 6029 }, { "epoch": 2.8042164005580528, "grad_norm": 0.3442256450653076, "learning_rate": 1.2860660487922616e-07, "loss": 0.3041, "step": 6030 }, { "epoch": 2.80468144473725, "grad_norm": 0.33402782678604126, "learning_rate": 1.2799754482903992e-07, "loss": 0.3278, "step": 6031 }, { "epoch": 2.805146488916447, "grad_norm": 0.3222576379776001, "learning_rate": 1.2738991170923588e-07, "loss": 0.2998, "step": 6032 }, { "epoch": 2.805611533095644, "grad_norm": 0.3217984139919281, "learning_rate": 1.2678370569778052e-07, "loss": 0.3382, "step": 6033 }, { "epoch": 2.8060765772748413, "grad_norm": 0.3216967284679413, "learning_rate": 1.2617892697222135e-07, "loss": 0.3328, "step": 6034 }, { "epoch": 2.806541621454038, "grad_norm": 0.3223290741443634, "learning_rate": 1.2557557570968825e-07, "loss": 0.3037, "step": 6035 }, { "epoch": 2.807006665633235, "grad_norm": 0.3248218894004822, "learning_rate": 1.2497365208689272e-07, "loss": 0.3154, "step": 6036 }, { "epoch": 2.8074717098124324, "grad_norm": 0.2976897358894348, "learning_rate": 1.2437315628012868e-07, "loss": 0.2948, "step": 6037 }, { "epoch": 2.8079367539916293, "grad_norm": 0.34811344742774963, "learning_rate": 1.2377408846527105e-07, "loss": 0.3377, "step": 6038 }, { "epoch": 2.8084017981708262, "grad_norm": 0.3315483033657074, "learning_rate": 1.231764488177789e-07, "loss": 0.3402, "step": 6039 }, { "epoch": 2.808866842350023, "grad_norm": 0.3172501027584076, "learning_rate": 1.225802375126889e-07, "loss": 0.3076, "step": 6040 }, { "epoch": 2.80933188652922, "grad_norm": 0.3447789251804352, "learning_rate": 1.2198545472462297e-07, "loss": 0.3221, "step": 6041 }, { "epoch": 2.8097969307084174, "grad_norm": 0.3011327385902405, "learning_rate": 1.2139210062778294e-07, "loss": 0.3261, "step": 6042 }, { "epoch": 2.8102619748876143, "grad_norm": 0.2930125594139099, "learning_rate": 1.2080017539595312e-07, "loss": 0.294, "step": 6043 }, { "epoch": 2.810727019066811, "grad_norm": 0.32197386026382446, "learning_rate": 1.202096792024976e-07, "loss": 0.3191, "step": 6044 }, { "epoch": 2.8111920632460086, "grad_norm": 0.3363608717918396, "learning_rate": 1.196206122203647e-07, "loss": 0.3183, "step": 6045 }, { "epoch": 2.8116571074252055, "grad_norm": 0.32255131006240845, "learning_rate": 1.1903297462208085e-07, "loss": 0.3075, "step": 6046 }, { "epoch": 2.8121221516044024, "grad_norm": 0.32174059748649597, "learning_rate": 1.1844676657975673e-07, "loss": 0.3559, "step": 6047 }, { "epoch": 2.8125871957835993, "grad_norm": 0.31685617566108704, "learning_rate": 1.1786198826508277e-07, "loss": 0.2865, "step": 6048 }, { "epoch": 2.8130522399627966, "grad_norm": 0.3263781666755676, "learning_rate": 1.1727863984933086e-07, "loss": 0.313, "step": 6049 }, { "epoch": 2.8135172841419935, "grad_norm": 0.3233252763748169, "learning_rate": 1.1669672150335487e-07, "loss": 0.3147, "step": 6050 }, { "epoch": 2.8139823283211904, "grad_norm": 0.32005932927131653, "learning_rate": 1.1611623339758904e-07, "loss": 0.3125, "step": 6051 }, { "epoch": 2.8144473725003873, "grad_norm": 0.33904585242271423, "learning_rate": 1.1553717570204847e-07, "loss": 0.301, "step": 6052 }, { "epoch": 2.8149124166795847, "grad_norm": 0.33245572447776794, "learning_rate": 1.149595485863303e-07, "loss": 0.3283, "step": 6053 }, { "epoch": 2.8153774608587816, "grad_norm": 0.3273317515850067, "learning_rate": 1.1438335221961195e-07, "loss": 0.3298, "step": 6054 }, { "epoch": 2.8158425050379785, "grad_norm": 0.31557828187942505, "learning_rate": 1.1380858677065177e-07, "loss": 0.3146, "step": 6055 }, { "epoch": 2.816307549217176, "grad_norm": 0.29916810989379883, "learning_rate": 1.1323525240778954e-07, "loss": 0.3233, "step": 6056 }, { "epoch": 2.8167725933963728, "grad_norm": 0.3167668282985687, "learning_rate": 1.1266334929894485e-07, "loss": 0.3322, "step": 6057 }, { "epoch": 2.8172376375755697, "grad_norm": 0.2994489371776581, "learning_rate": 1.120928776116198e-07, "loss": 0.3082, "step": 6058 }, { "epoch": 2.8177026817547666, "grad_norm": 0.31627991795539856, "learning_rate": 1.1152383751289575e-07, "loss": 0.3136, "step": 6059 }, { "epoch": 2.8181677259339635, "grad_norm": 0.3162107467651367, "learning_rate": 1.1095622916943494e-07, "loss": 0.3221, "step": 6060 }, { "epoch": 2.818632770113161, "grad_norm": 0.31993138790130615, "learning_rate": 1.103900527474816e-07, "loss": 0.3206, "step": 6061 }, { "epoch": 2.8190978142923577, "grad_norm": 0.3255464732646942, "learning_rate": 1.0982530841285921e-07, "loss": 0.3131, "step": 6062 }, { "epoch": 2.8195628584715546, "grad_norm": 0.317570298910141, "learning_rate": 1.0926199633097156e-07, "loss": 0.2974, "step": 6063 }, { "epoch": 2.820027902650752, "grad_norm": 0.3149189352989197, "learning_rate": 1.0870011666680502e-07, "loss": 0.2871, "step": 6064 }, { "epoch": 2.820492946829949, "grad_norm": 0.3116709589958191, "learning_rate": 1.081396695849235e-07, "loss": 0.3331, "step": 6065 }, { "epoch": 2.820957991009146, "grad_norm": 0.3222356140613556, "learning_rate": 1.0758065524947403e-07, "loss": 0.328, "step": 6066 }, { "epoch": 2.821423035188343, "grad_norm": 0.2908288538455963, "learning_rate": 1.0702307382418175e-07, "loss": 0.2997, "step": 6067 }, { "epoch": 2.82188807936754, "grad_norm": 0.3209805190563202, "learning_rate": 1.0646692547235437e-07, "loss": 0.3317, "step": 6068 }, { "epoch": 2.822353123546737, "grad_norm": 0.3027243912220001, "learning_rate": 1.0591221035687716e-07, "loss": 0.3113, "step": 6069 }, { "epoch": 2.822818167725934, "grad_norm": 0.30290907621383667, "learning_rate": 1.0535892864021901e-07, "loss": 0.3267, "step": 6070 }, { "epoch": 2.8232832119051308, "grad_norm": 0.3006175756454468, "learning_rate": 1.0480708048442589e-07, "loss": 0.3119, "step": 6071 }, { "epoch": 2.823748256084328, "grad_norm": 0.34534621238708496, "learning_rate": 1.0425666605112516e-07, "loss": 0.3862, "step": 6072 }, { "epoch": 2.824213300263525, "grad_norm": 0.317735493183136, "learning_rate": 1.0370768550152454e-07, "loss": 0.3082, "step": 6073 }, { "epoch": 2.824678344442722, "grad_norm": 0.36195850372314453, "learning_rate": 1.0316013899641264e-07, "loss": 0.3223, "step": 6074 }, { "epoch": 2.8251433886219193, "grad_norm": 0.3154940903186798, "learning_rate": 1.0261402669615505e-07, "loss": 0.3041, "step": 6075 }, { "epoch": 2.825608432801116, "grad_norm": 0.33876118063926697, "learning_rate": 1.0206934876070052e-07, "loss": 0.3145, "step": 6076 }, { "epoch": 2.826073476980313, "grad_norm": 0.3257219195365906, "learning_rate": 1.015261053495753e-07, "loss": 0.3304, "step": 6077 }, { "epoch": 2.8265385211595104, "grad_norm": 0.30201414227485657, "learning_rate": 1.0098429662188769e-07, "loss": 0.3044, "step": 6078 }, { "epoch": 2.8270035653387073, "grad_norm": 0.31688815355300903, "learning_rate": 1.0044392273632354e-07, "loss": 0.3012, "step": 6079 }, { "epoch": 2.8274686095179042, "grad_norm": 0.32895052433013916, "learning_rate": 9.990498385115066e-08, "loss": 0.3256, "step": 6080 }, { "epoch": 2.827933653697101, "grad_norm": 0.32152754068374634, "learning_rate": 9.936748012421504e-08, "loss": 0.3223, "step": 6081 }, { "epoch": 2.828398697876298, "grad_norm": 0.31743600964546204, "learning_rate": 9.883141171294242e-08, "loss": 0.2809, "step": 6082 }, { "epoch": 2.8288637420554954, "grad_norm": 0.3381618857383728, "learning_rate": 9.829677877433886e-08, "loss": 0.3185, "step": 6083 }, { "epoch": 2.8293287862346923, "grad_norm": 0.32062768936157227, "learning_rate": 9.776358146498966e-08, "loss": 0.3108, "step": 6084 }, { "epoch": 2.829793830413889, "grad_norm": 0.30004435777664185, "learning_rate": 9.72318199410599e-08, "loss": 0.3067, "step": 6085 }, { "epoch": 2.8302588745930866, "grad_norm": 0.3237650990486145, "learning_rate": 9.670149435829334e-08, "loss": 0.3609, "step": 6086 }, { "epoch": 2.8307239187722835, "grad_norm": 0.3168402314186096, "learning_rate": 9.617260487201407e-08, "loss": 0.3113, "step": 6087 }, { "epoch": 2.8311889629514804, "grad_norm": 0.3079497218132019, "learning_rate": 9.564515163712595e-08, "loss": 0.2955, "step": 6088 }, { "epoch": 2.8316540071306773, "grad_norm": 0.3398304581642151, "learning_rate": 9.511913480810985e-08, "loss": 0.3315, "step": 6089 }, { "epoch": 2.832119051309874, "grad_norm": 0.35483306646347046, "learning_rate": 9.459455453902866e-08, "loss": 0.3551, "step": 6090 }, { "epoch": 2.8325840954890715, "grad_norm": 0.34219107031822205, "learning_rate": 9.407141098352335e-08, "loss": 0.3129, "step": 6091 }, { "epoch": 2.8330491396682684, "grad_norm": 0.29722991585731506, "learning_rate": 9.354970429481413e-08, "loss": 0.2773, "step": 6092 }, { "epoch": 2.8335141838474653, "grad_norm": 0.31382736563682556, "learning_rate": 9.302943462569991e-08, "loss": 0.3483, "step": 6093 }, { "epoch": 2.8339792280266627, "grad_norm": 0.3087048828601837, "learning_rate": 9.25106021285599e-08, "loss": 0.2727, "step": 6094 }, { "epoch": 2.8344442722058596, "grad_norm": 0.3467859923839569, "learning_rate": 9.199320695535086e-08, "loss": 0.3607, "step": 6095 }, { "epoch": 2.8349093163850565, "grad_norm": 0.33929920196533203, "learning_rate": 9.147724925760993e-08, "loss": 0.2912, "step": 6096 }, { "epoch": 2.835374360564254, "grad_norm": 0.31342393159866333, "learning_rate": 9.096272918645343e-08, "loss": 0.2958, "step": 6097 }, { "epoch": 2.8358394047434508, "grad_norm": 0.31853240728378296, "learning_rate": 9.044964689257474e-08, "loss": 0.3308, "step": 6098 }, { "epoch": 2.8363044489226477, "grad_norm": 0.32699882984161377, "learning_rate": 8.993800252624863e-08, "loss": 0.2928, "step": 6099 }, { "epoch": 2.8367694931018446, "grad_norm": 0.2974814474582672, "learning_rate": 8.942779623732578e-08, "loss": 0.2999, "step": 6100 }, { "epoch": 2.8372345372810415, "grad_norm": 0.32415345311164856, "learning_rate": 8.89190281752389e-08, "loss": 0.326, "step": 6101 }, { "epoch": 2.837699581460239, "grad_norm": 0.31111282110214233, "learning_rate": 8.841169848899711e-08, "loss": 0.3196, "step": 6102 }, { "epoch": 2.8381646256394357, "grad_norm": 0.3355112671852112, "learning_rate": 8.790580732718934e-08, "loss": 0.3027, "step": 6103 }, { "epoch": 2.8386296698186326, "grad_norm": 0.33080142736434937, "learning_rate": 8.740135483798207e-08, "loss": 0.3272, "step": 6104 }, { "epoch": 2.83909471399783, "grad_norm": 0.3315957486629486, "learning_rate": 8.68983411691221e-08, "loss": 0.3239, "step": 6105 }, { "epoch": 2.839559758177027, "grad_norm": 0.3323264420032501, "learning_rate": 8.639676646793382e-08, "loss": 0.3448, "step": 6106 }, { "epoch": 2.840024802356224, "grad_norm": 0.29447272419929504, "learning_rate": 8.589663088131972e-08, "loss": 0.2987, "step": 6107 }, { "epoch": 2.840489846535421, "grad_norm": 0.3482167422771454, "learning_rate": 8.539793455576207e-08, "loss": 0.3381, "step": 6108 }, { "epoch": 2.840954890714618, "grad_norm": 0.29525330662727356, "learning_rate": 8.490067763732124e-08, "loss": 0.3098, "step": 6109 }, { "epoch": 2.841419934893815, "grad_norm": 0.3327022194862366, "learning_rate": 8.44048602716352e-08, "loss": 0.3471, "step": 6110 }, { "epoch": 2.841884979073012, "grad_norm": 0.3110980987548828, "learning_rate": 8.391048260392054e-08, "loss": 0.3353, "step": 6111 }, { "epoch": 2.8423500232522088, "grad_norm": 0.31810250878334045, "learning_rate": 8.341754477897257e-08, "loss": 0.3223, "step": 6112 }, { "epoch": 2.842815067431406, "grad_norm": 0.3068527281284332, "learning_rate": 8.292604694116523e-08, "loss": 0.3382, "step": 6113 }, { "epoch": 2.843280111610603, "grad_norm": 0.32268133759498596, "learning_rate": 8.24359892344495e-08, "loss": 0.3259, "step": 6114 }, { "epoch": 2.8437451557898, "grad_norm": 0.31271931529045105, "learning_rate": 8.194737180235668e-08, "loss": 0.3327, "step": 6115 }, { "epoch": 2.8442101999689973, "grad_norm": 0.31981274485588074, "learning_rate": 8.146019478799282e-08, "loss": 0.2942, "step": 6116 }, { "epoch": 2.844675244148194, "grad_norm": 0.3350343406200409, "learning_rate": 8.097445833404605e-08, "loss": 0.3287, "step": 6117 }, { "epoch": 2.845140288327391, "grad_norm": 0.316834419965744, "learning_rate": 8.049016258277976e-08, "loss": 0.2982, "step": 6118 }, { "epoch": 2.845605332506588, "grad_norm": 0.3442407548427582, "learning_rate": 8.000730767603604e-08, "loss": 0.342, "step": 6119 }, { "epoch": 2.846070376685785, "grad_norm": 0.3273371160030365, "learning_rate": 7.952589375523567e-08, "loss": 0.346, "step": 6120 }, { "epoch": 2.8465354208649822, "grad_norm": 0.29861021041870117, "learning_rate": 7.904592096137753e-08, "loss": 0.3062, "step": 6121 }, { "epoch": 2.847000465044179, "grad_norm": 0.3003145158290863, "learning_rate": 7.856738943503694e-08, "loss": 0.3057, "step": 6122 }, { "epoch": 2.847465509223376, "grad_norm": 0.2993358373641968, "learning_rate": 7.809029931636902e-08, "loss": 0.3087, "step": 6123 }, { "epoch": 2.8479305534025734, "grad_norm": 0.31024885177612305, "learning_rate": 7.761465074510422e-08, "loss": 0.3314, "step": 6124 }, { "epoch": 2.8483955975817703, "grad_norm": 0.31834203004837036, "learning_rate": 7.714044386055386e-08, "loss": 0.3016, "step": 6125 }, { "epoch": 2.848860641760967, "grad_norm": 0.3217991888523102, "learning_rate": 7.666767880160464e-08, "loss": 0.3448, "step": 6126 }, { "epoch": 2.8493256859401646, "grad_norm": 0.29849451780319214, "learning_rate": 7.619635570672135e-08, "loss": 0.3216, "step": 6127 }, { "epoch": 2.8497907301193615, "grad_norm": 0.31368592381477356, "learning_rate": 7.5726474713948e-08, "loss": 0.303, "step": 6128 }, { "epoch": 2.8502557742985584, "grad_norm": 0.295265257358551, "learning_rate": 7.525803596090397e-08, "loss": 0.3073, "step": 6129 }, { "epoch": 2.8507208184777553, "grad_norm": 0.31519418954849243, "learning_rate": 7.479103958478783e-08, "loss": 0.3233, "step": 6130 }, { "epoch": 2.851185862656952, "grad_norm": 0.3676014840602875, "learning_rate": 7.432548572237519e-08, "loss": 0.3799, "step": 6131 }, { "epoch": 2.8516509068361495, "grad_norm": 0.3276804983615875, "learning_rate": 7.386137451001974e-08, "loss": 0.3389, "step": 6132 }, { "epoch": 2.8521159510153464, "grad_norm": 0.3015483319759369, "learning_rate": 7.339870608365107e-08, "loss": 0.291, "step": 6133 }, { "epoch": 2.8525809951945433, "grad_norm": 0.3150058090686798, "learning_rate": 7.293748057877859e-08, "loss": 0.3113, "step": 6134 }, { "epoch": 2.8530460393737407, "grad_norm": 0.3133685886859894, "learning_rate": 7.247769813048644e-08, "loss": 0.3141, "step": 6135 }, { "epoch": 2.8535110835529376, "grad_norm": 0.3154557943344116, "learning_rate": 7.201935887343858e-08, "loss": 0.3067, "step": 6136 }, { "epoch": 2.8539761277321345, "grad_norm": 0.32975584268569946, "learning_rate": 7.156246294187374e-08, "loss": 0.3272, "step": 6137 }, { "epoch": 2.854441171911332, "grad_norm": 0.31270360946655273, "learning_rate": 7.110701046961044e-08, "loss": 0.3105, "step": 6138 }, { "epoch": 2.8549062160905287, "grad_norm": 0.3216429650783539, "learning_rate": 7.065300159004307e-08, "loss": 0.299, "step": 6139 }, { "epoch": 2.8553712602697257, "grad_norm": 0.33457085490226746, "learning_rate": 7.02004364361436e-08, "loss": 0.3355, "step": 6140 }, { "epoch": 2.8558363044489226, "grad_norm": 0.31512632966041565, "learning_rate": 6.974931514046046e-08, "loss": 0.3328, "step": 6141 }, { "epoch": 2.8563013486281195, "grad_norm": 0.3386141359806061, "learning_rate": 6.929963783511961e-08, "loss": 0.3283, "step": 6142 }, { "epoch": 2.856766392807317, "grad_norm": 0.30703625082969666, "learning_rate": 6.885140465182516e-08, "loss": 0.3204, "step": 6143 }, { "epoch": 2.8572314369865137, "grad_norm": 0.31355178356170654, "learning_rate": 6.840461572185708e-08, "loss": 0.2829, "step": 6144 }, { "epoch": 2.8576964811657106, "grad_norm": 0.36136549711227417, "learning_rate": 6.795927117607238e-08, "loss": 0.3508, "step": 6145 }, { "epoch": 2.858161525344908, "grad_norm": 0.30663174390792847, "learning_rate": 6.751537114490503e-08, "loss": 0.3015, "step": 6146 }, { "epoch": 2.858626569524105, "grad_norm": 0.35951730608940125, "learning_rate": 6.707291575836661e-08, "loss": 0.3571, "step": 6147 }, { "epoch": 2.859091613703302, "grad_norm": 0.3195739984512329, "learning_rate": 6.663190514604456e-08, "loss": 0.304, "step": 6148 }, { "epoch": 2.8595566578824987, "grad_norm": 0.32330840826034546, "learning_rate": 6.61923394371039e-08, "loss": 0.314, "step": 6149 }, { "epoch": 2.8600217020616956, "grad_norm": 0.30728137493133545, "learning_rate": 6.575421876028721e-08, "loss": 0.3342, "step": 6150 }, { "epoch": 2.860486746240893, "grad_norm": 0.2898307740688324, "learning_rate": 6.531754324391126e-08, "loss": 0.3215, "step": 6151 }, { "epoch": 2.86095179042009, "grad_norm": 0.308747798204422, "learning_rate": 6.488231301587266e-08, "loss": 0.3281, "step": 6152 }, { "epoch": 2.8614168345992868, "grad_norm": 0.32874009013175964, "learning_rate": 6.444852820364222e-08, "loss": 0.3273, "step": 6153 }, { "epoch": 2.861881878778484, "grad_norm": 0.3160831332206726, "learning_rate": 6.401618893426886e-08, "loss": 0.3156, "step": 6154 }, { "epoch": 2.862346922957681, "grad_norm": 0.3409363031387329, "learning_rate": 6.358529533437796e-08, "loss": 0.3194, "step": 6155 }, { "epoch": 2.862811967136878, "grad_norm": 0.29154202342033386, "learning_rate": 6.315584753017134e-08, "loss": 0.3113, "step": 6156 }, { "epoch": 2.8632770113160753, "grad_norm": 0.3012997508049011, "learning_rate": 6.272784564742673e-08, "loss": 0.3508, "step": 6157 }, { "epoch": 2.863742055495272, "grad_norm": 0.3225801885128021, "learning_rate": 6.230128981149941e-08, "loss": 0.3195, "step": 6158 }, { "epoch": 2.864207099674469, "grad_norm": 0.30362215638160706, "learning_rate": 6.187618014732056e-08, "loss": 0.3027, "step": 6159 }, { "epoch": 2.864672143853666, "grad_norm": 0.32935047149658203, "learning_rate": 6.145251677939778e-08, "loss": 0.3351, "step": 6160 }, { "epoch": 2.865137188032863, "grad_norm": 0.3463619649410248, "learning_rate": 6.103029983181519e-08, "loss": 0.3409, "step": 6161 }, { "epoch": 2.8656022322120602, "grad_norm": 0.29759082198143005, "learning_rate": 6.060952942823328e-08, "loss": 0.3043, "step": 6162 }, { "epoch": 2.866067276391257, "grad_norm": 0.3281930685043335, "learning_rate": 6.01902056918896e-08, "loss": 0.3291, "step": 6163 }, { "epoch": 2.866532320570454, "grad_norm": 0.3233920931816101, "learning_rate": 5.977232874559535e-08, "loss": 0.2848, "step": 6164 }, { "epoch": 2.8669973647496514, "grad_norm": 0.31291288137435913, "learning_rate": 5.935589871174208e-08, "loss": 0.341, "step": 6165 }, { "epoch": 2.8674624089288483, "grad_norm": 0.3225501775741577, "learning_rate": 5.8940915712293875e-08, "loss": 0.3351, "step": 6166 }, { "epoch": 2.867927453108045, "grad_norm": 0.33493563532829285, "learning_rate": 5.8527379868792976e-08, "loss": 0.3184, "step": 6167 }, { "epoch": 2.8683924972872425, "grad_norm": 0.32679054141044617, "learning_rate": 5.811529130235749e-08, "loss": 0.2963, "step": 6168 }, { "epoch": 2.8688575414664395, "grad_norm": 0.319558709859848, "learning_rate": 5.770465013368198e-08, "loss": 0.33, "step": 6169 }, { "epoch": 2.8693225856456364, "grad_norm": 0.28156691789627075, "learning_rate": 5.729545648303525e-08, "loss": 0.2726, "step": 6170 }, { "epoch": 2.8697876298248333, "grad_norm": 0.3368379771709442, "learning_rate": 5.688771047026476e-08, "loss": 0.3513, "step": 6171 }, { "epoch": 2.87025267400403, "grad_norm": 0.32251864671707153, "learning_rate": 5.648141221479164e-08, "loss": 0.3177, "step": 6172 }, { "epoch": 2.8707177181832275, "grad_norm": 0.3322902321815491, "learning_rate": 5.6076561835615164e-08, "loss": 0.3423, "step": 6173 }, { "epoch": 2.8711827623624244, "grad_norm": 0.3071969449520111, "learning_rate": 5.5673159451308246e-08, "loss": 0.3236, "step": 6174 }, { "epoch": 2.8716478065416213, "grad_norm": 0.29615259170532227, "learning_rate": 5.527120518002138e-08, "loss": 0.2841, "step": 6175 }, { "epoch": 2.8721128507208187, "grad_norm": 0.32115840911865234, "learning_rate": 5.487069913948096e-08, "loss": 0.366, "step": 6176 }, { "epoch": 2.8725778949000156, "grad_norm": 0.31044548749923706, "learning_rate": 5.447164144698758e-08, "loss": 0.3026, "step": 6177 }, { "epoch": 2.8730429390792125, "grad_norm": 0.3024675250053406, "learning_rate": 5.407403221941998e-08, "loss": 0.3232, "step": 6178 }, { "epoch": 2.8735079832584094, "grad_norm": 0.3414202034473419, "learning_rate": 5.367787157323057e-08, "loss": 0.3284, "step": 6179 }, { "epoch": 2.8739730274376063, "grad_norm": 0.3175242841243744, "learning_rate": 5.3283159624448745e-08, "loss": 0.2636, "step": 6180 }, { "epoch": 2.8744380716168036, "grad_norm": 0.490583211183548, "learning_rate": 5.2889896488679816e-08, "loss": 0.3264, "step": 6181 }, { "epoch": 2.8749031157960006, "grad_norm": 0.32635655999183655, "learning_rate": 5.249808228110276e-08, "loss": 0.3085, "step": 6182 }, { "epoch": 2.8753681599751975, "grad_norm": 0.32171571254730225, "learning_rate": 5.2107717116474665e-08, "loss": 0.3289, "step": 6183 }, { "epoch": 2.875833204154395, "grad_norm": 0.30658993124961853, "learning_rate": 5.171880110912686e-08, "loss": 0.2749, "step": 6184 }, { "epoch": 2.8762982483335917, "grad_norm": 0.3477241098880768, "learning_rate": 5.133133437296656e-08, "loss": 0.3022, "step": 6185 }, { "epoch": 2.8767632925127886, "grad_norm": 0.31438449025154114, "learning_rate": 5.094531702147632e-08, "loss": 0.2979, "step": 6186 }, { "epoch": 2.877228336691986, "grad_norm": 0.32434505224227905, "learning_rate": 5.056074916771458e-08, "loss": 0.2972, "step": 6187 }, { "epoch": 2.877693380871183, "grad_norm": 0.33703556656837463, "learning_rate": 5.01776309243146e-08, "loss": 0.3343, "step": 6188 }, { "epoch": 2.8781584250503798, "grad_norm": 0.2929964065551758, "learning_rate": 4.97959624034855e-08, "loss": 0.3, "step": 6189 }, { "epoch": 2.8786234692295767, "grad_norm": 0.3615955114364624, "learning_rate": 4.9415743717012296e-08, "loss": 0.3695, "step": 6190 }, { "epoch": 2.8790885134087736, "grad_norm": 0.33232712745666504, "learning_rate": 4.903697497625537e-08, "loss": 0.2803, "step": 6191 }, { "epoch": 2.879553557587971, "grad_norm": 0.32238370180130005, "learning_rate": 4.865965629214819e-08, "loss": 0.342, "step": 6192 }, { "epoch": 2.880018601767168, "grad_norm": 0.29459741711616516, "learning_rate": 4.828378777520293e-08, "loss": 0.3223, "step": 6193 }, { "epoch": 2.8804836459463647, "grad_norm": 0.3244211971759796, "learning_rate": 4.790936953550485e-08, "loss": 0.3027, "step": 6194 }, { "epoch": 2.880948690125562, "grad_norm": 0.33812856674194336, "learning_rate": 4.753640168271456e-08, "loss": 0.3327, "step": 6195 }, { "epoch": 2.881413734304759, "grad_norm": 0.3344283998012543, "learning_rate": 4.7164884326068584e-08, "loss": 0.3385, "step": 6196 }, { "epoch": 2.881878778483956, "grad_norm": 0.332630455493927, "learning_rate": 4.6794817574378204e-08, "loss": 0.3118, "step": 6197 }, { "epoch": 2.8823438226631533, "grad_norm": 0.33553823828697205, "learning_rate": 4.6426201536030616e-08, "loss": 0.3352, "step": 6198 }, { "epoch": 2.88280886684235, "grad_norm": 0.32330450415611267, "learning_rate": 4.605903631898612e-08, "loss": 0.3162, "step": 6199 }, { "epoch": 2.883273911021547, "grad_norm": 0.32343700528144836, "learning_rate": 4.569332203078258e-08, "loss": 0.3222, "step": 6200 }, { "epoch": 2.883738955200744, "grad_norm": 0.310979425907135, "learning_rate": 4.5329058778531e-08, "loss": 0.2865, "step": 6201 }, { "epoch": 2.884203999379941, "grad_norm": 0.3251758813858032, "learning_rate": 4.4966246668919355e-08, "loss": 0.3159, "step": 6202 }, { "epoch": 2.8846690435591382, "grad_norm": 0.3213922381401062, "learning_rate": 4.460488580820821e-08, "loss": 0.3045, "step": 6203 }, { "epoch": 2.885134087738335, "grad_norm": 0.3345593512058258, "learning_rate": 4.424497630223512e-08, "loss": 0.3395, "step": 6204 }, { "epoch": 2.885599131917532, "grad_norm": 0.3209022879600525, "learning_rate": 4.3886518256411325e-08, "loss": 0.3152, "step": 6205 }, { "epoch": 2.8860641760967294, "grad_norm": 0.31403934955596924, "learning_rate": 4.35295117757234e-08, "loss": 0.3292, "step": 6206 }, { "epoch": 2.8865292202759263, "grad_norm": 0.289949893951416, "learning_rate": 4.3173956964732145e-08, "loss": 0.304, "step": 6207 }, { "epoch": 2.886994264455123, "grad_norm": 0.33771443367004395, "learning_rate": 4.281985392757537e-08, "loss": 0.3385, "step": 6208 }, { "epoch": 2.88745930863432, "grad_norm": 0.2989499866962433, "learning_rate": 4.2467202767962346e-08, "loss": 0.2799, "step": 6209 }, { "epoch": 2.8879243528135174, "grad_norm": 0.318925142288208, "learning_rate": 4.211600358917989e-08, "loss": 0.3248, "step": 6210 }, { "epoch": 2.8883893969927144, "grad_norm": 0.31016743183135986, "learning_rate": 4.17662564940885e-08, "loss": 0.3164, "step": 6211 }, { "epoch": 2.8888544411719113, "grad_norm": 0.3462943732738495, "learning_rate": 4.14179615851229e-08, "loss": 0.3617, "step": 6212 }, { "epoch": 2.889319485351108, "grad_norm": 0.3116661310195923, "learning_rate": 4.1071118964293166e-08, "loss": 0.3037, "step": 6213 }, { "epoch": 2.8897845295303055, "grad_norm": 0.29562506079673767, "learning_rate": 4.07257287331847e-08, "loss": 0.3133, "step": 6214 }, { "epoch": 2.8902495737095024, "grad_norm": 0.3385922610759735, "learning_rate": 4.038179099295547e-08, "loss": 0.3416, "step": 6215 }, { "epoch": 2.8907146178886993, "grad_norm": 0.28111937642097473, "learning_rate": 4.0039305844339905e-08, "loss": 0.3165, "step": 6216 }, { "epoch": 2.8911796620678967, "grad_norm": 0.3083990216255188, "learning_rate": 3.969827338764665e-08, "loss": 0.3474, "step": 6217 }, { "epoch": 2.8916447062470936, "grad_norm": 0.3193405270576477, "learning_rate": 3.935869372275747e-08, "loss": 0.3144, "step": 6218 }, { "epoch": 2.8921097504262905, "grad_norm": 0.3196331858634949, "learning_rate": 3.9020566949131145e-08, "loss": 0.3228, "step": 6219 }, { "epoch": 2.8925747946054874, "grad_norm": 0.31240054965019226, "learning_rate": 3.868389316579846e-08, "loss": 0.3234, "step": 6220 }, { "epoch": 2.8930398387846843, "grad_norm": 0.2907924950122833, "learning_rate": 3.834867247136553e-08, "loss": 0.3052, "step": 6221 }, { "epoch": 2.8935048829638816, "grad_norm": 0.3368573784828186, "learning_rate": 3.801490496401439e-08, "loss": 0.3779, "step": 6222 }, { "epoch": 2.8939699271430785, "grad_norm": 0.3105674088001251, "learning_rate": 3.768259074149905e-08, "loss": 0.3045, "step": 6223 }, { "epoch": 2.8944349713222755, "grad_norm": 0.31339845061302185, "learning_rate": 3.735172990114888e-08, "loss": 0.3456, "step": 6224 }, { "epoch": 2.894900015501473, "grad_norm": 0.32460853457450867, "learning_rate": 3.702232253986804e-08, "loss": 0.3115, "step": 6225 }, { "epoch": 2.8953650596806697, "grad_norm": 0.33056336641311646, "learning_rate": 3.6694368754134346e-08, "loss": 0.3283, "step": 6226 }, { "epoch": 2.8958301038598666, "grad_norm": 0.31713178753852844, "learning_rate": 3.6367868640000416e-08, "loss": 0.3124, "step": 6227 }, { "epoch": 2.896295148039064, "grad_norm": 0.3436635732650757, "learning_rate": 3.6042822293093083e-08, "loss": 0.3683, "step": 6228 }, { "epoch": 2.896760192218261, "grad_norm": 0.30953043699264526, "learning_rate": 3.571922980861231e-08, "loss": 0.3073, "step": 6229 }, { "epoch": 2.8972252363974578, "grad_norm": 0.32656288146972656, "learning_rate": 3.539709128133395e-08, "loss": 0.2919, "step": 6230 }, { "epoch": 2.8976902805766547, "grad_norm": 0.31821176409721375, "learning_rate": 3.5076406805606425e-08, "loss": 0.3195, "step": 6231 }, { "epoch": 2.8981553247558516, "grad_norm": 0.31194084882736206, "learning_rate": 3.475717647535348e-08, "loss": 0.3455, "step": 6232 }, { "epoch": 2.898620368935049, "grad_norm": 0.31423068046569824, "learning_rate": 3.443940038407256e-08, "loss": 0.3265, "step": 6233 }, { "epoch": 2.899085413114246, "grad_norm": 0.2951379418373108, "learning_rate": 3.4123078624834214e-08, "loss": 0.2827, "step": 6234 }, { "epoch": 2.8995504572934427, "grad_norm": 0.3533174395561218, "learning_rate": 3.3808211290284886e-08, "loss": 0.3611, "step": 6235 }, { "epoch": 2.90001550147264, "grad_norm": 0.3347010612487793, "learning_rate": 3.349479847264414e-08, "loss": 0.3355, "step": 6236 }, { "epoch": 2.900480545651837, "grad_norm": 0.3282628059387207, "learning_rate": 3.318284026370522e-08, "loss": 0.3093, "step": 6237 }, { "epoch": 2.900945589831034, "grad_norm": 0.31977033615112305, "learning_rate": 3.287233675483503e-08, "loss": 0.328, "step": 6238 }, { "epoch": 2.9014106340102312, "grad_norm": 0.3105235993862152, "learning_rate": 3.2563288036976394e-08, "loss": 0.3117, "step": 6239 }, { "epoch": 2.901875678189428, "grad_norm": 0.3219479024410248, "learning_rate": 3.2255694200643003e-08, "loss": 0.3167, "step": 6240 }, { "epoch": 2.902340722368625, "grad_norm": 0.30277541279792786, "learning_rate": 3.194955533592559e-08, "loss": 0.2801, "step": 6241 }, { "epoch": 2.902805766547822, "grad_norm": 0.3230368196964264, "learning_rate": 3.16448715324863e-08, "loss": 0.3138, "step": 6242 }, { "epoch": 2.903270810727019, "grad_norm": 0.3114720582962036, "learning_rate": 3.13416428795621e-08, "loss": 0.3266, "step": 6243 }, { "epoch": 2.903735854906216, "grad_norm": 0.324063777923584, "learning_rate": 3.103986946596415e-08, "loss": 0.3465, "step": 6244 }, { "epoch": 2.904200899085413, "grad_norm": 0.318876177072525, "learning_rate": 3.073955138007734e-08, "loss": 0.3217, "step": 6245 }, { "epoch": 2.90466594326461, "grad_norm": 0.3103938698768616, "learning_rate": 3.044068870985906e-08, "loss": 0.3207, "step": 6246 }, { "epoch": 2.9051309874438074, "grad_norm": 0.31359586119651794, "learning_rate": 3.014328154284152e-08, "loss": 0.3173, "step": 6247 }, { "epoch": 2.9055960316230043, "grad_norm": 0.311901330947876, "learning_rate": 2.98473299661306e-08, "loss": 0.3194, "step": 6248 }, { "epoch": 2.906061075802201, "grad_norm": 0.31329211592674255, "learning_rate": 2.955283406640641e-08, "loss": 0.2925, "step": 6249 }, { "epoch": 2.906526119981398, "grad_norm": 0.31857290863990784, "learning_rate": 2.9259793929921066e-08, "loss": 0.3267, "step": 6250 }, { "epoch": 2.906991164160595, "grad_norm": 0.3248409628868103, "learning_rate": 2.8968209642501465e-08, "loss": 0.323, "step": 6251 }, { "epoch": 2.9074562083397923, "grad_norm": 0.3088364005088806, "learning_rate": 2.8678081289548187e-08, "loss": 0.284, "step": 6252 }, { "epoch": 2.9079212525189893, "grad_norm": 0.3107573091983795, "learning_rate": 2.8389408956034925e-08, "loss": 0.3242, "step": 6253 }, { "epoch": 2.908386296698186, "grad_norm": 0.3310908079147339, "learning_rate": 2.810219272650849e-08, "loss": 0.3107, "step": 6254 }, { "epoch": 2.9088513408773835, "grad_norm": 0.30595463514328003, "learning_rate": 2.7816432685091598e-08, "loss": 0.3522, "step": 6255 }, { "epoch": 2.9093163850565804, "grad_norm": 0.3248220980167389, "learning_rate": 2.7532128915476742e-08, "loss": 0.3116, "step": 6256 }, { "epoch": 2.9097814292357773, "grad_norm": 0.3430537283420563, "learning_rate": 2.7249281500932868e-08, "loss": 0.3414, "step": 6257 }, { "epoch": 2.9102464734149747, "grad_norm": 0.32663294672966003, "learning_rate": 2.6967890524301488e-08, "loss": 0.2961, "step": 6258 }, { "epoch": 2.9107115175941716, "grad_norm": 0.32773134112358093, "learning_rate": 2.6687956067997234e-08, "loss": 0.2928, "step": 6259 }, { "epoch": 2.9111765617733685, "grad_norm": 0.3033883571624756, "learning_rate": 2.640947821400841e-08, "loss": 0.3062, "step": 6260 }, { "epoch": 2.9116416059525654, "grad_norm": 0.33305469155311584, "learning_rate": 2.6132457043896442e-08, "loss": 0.2965, "step": 6261 }, { "epoch": 2.9121066501317623, "grad_norm": 0.33847376704216003, "learning_rate": 2.585689263879643e-08, "loss": 0.3387, "step": 6262 }, { "epoch": 2.9125716943109596, "grad_norm": 0.3103272616863251, "learning_rate": 2.558278507941714e-08, "loss": 0.2881, "step": 6263 }, { "epoch": 2.9130367384901565, "grad_norm": 0.33586204051971436, "learning_rate": 2.5310134446039357e-08, "loss": 0.3428, "step": 6264 }, { "epoch": 2.9135017826693534, "grad_norm": 0.34026390314102173, "learning_rate": 2.503894081851921e-08, "loss": 0.3164, "step": 6265 }, { "epoch": 2.913966826848551, "grad_norm": 0.3134699761867523, "learning_rate": 2.476920427628371e-08, "loss": 0.3022, "step": 6266 }, { "epoch": 2.9144318710277477, "grad_norm": 0.31349417567253113, "learning_rate": 2.4500924898335223e-08, "loss": 0.3335, "step": 6267 }, { "epoch": 2.9148969152069446, "grad_norm": 0.30703291296958923, "learning_rate": 2.4234102763247558e-08, "loss": 0.3165, "step": 6268 }, { "epoch": 2.915361959386142, "grad_norm": 0.30995601415634155, "learning_rate": 2.3968737949169318e-08, "loss": 0.3276, "step": 6269 }, { "epoch": 2.915827003565339, "grad_norm": 0.3151072561740875, "learning_rate": 2.370483053382111e-08, "loss": 0.3182, "step": 6270 }, { "epoch": 2.9162920477445358, "grad_norm": 0.3360000252723694, "learning_rate": 2.3442380594497215e-08, "loss": 0.3073, "step": 6271 }, { "epoch": 2.9167570919237327, "grad_norm": 0.3323414921760559, "learning_rate": 2.3181388208065036e-08, "loss": 0.3643, "step": 6272 }, { "epoch": 2.9172221361029296, "grad_norm": 0.3142452538013458, "learning_rate": 2.2921853450965094e-08, "loss": 0.2902, "step": 6273 }, { "epoch": 2.917687180282127, "grad_norm": 0.332619309425354, "learning_rate": 2.2663776399211024e-08, "loss": 0.3332, "step": 6274 }, { "epoch": 2.918152224461324, "grad_norm": 0.30116286873817444, "learning_rate": 2.2407157128389033e-08, "loss": 0.2971, "step": 6275 }, { "epoch": 2.9186172686405207, "grad_norm": 0.3235343396663666, "learning_rate": 2.2151995713659e-08, "loss": 0.3239, "step": 6276 }, { "epoch": 2.919082312819718, "grad_norm": 0.3417525887489319, "learning_rate": 2.189829222975337e-08, "loss": 0.3347, "step": 6277 }, { "epoch": 2.919547356998915, "grad_norm": 0.34328693151474, "learning_rate": 2.1646046750978255e-08, "loss": 0.3241, "step": 6278 }, { "epoch": 2.920012401178112, "grad_norm": 0.3048785924911499, "learning_rate": 2.1395259351211227e-08, "loss": 0.2951, "step": 6279 }, { "epoch": 2.920477445357309, "grad_norm": 0.31795376539230347, "learning_rate": 2.1145930103904645e-08, "loss": 0.3376, "step": 6280 }, { "epoch": 2.9209424895365057, "grad_norm": 0.34835532307624817, "learning_rate": 2.0898059082082868e-08, "loss": 0.3394, "step": 6281 }, { "epoch": 2.921407533715703, "grad_norm": 0.30004480481147766, "learning_rate": 2.065164635834338e-08, "loss": 0.2802, "step": 6282 }, { "epoch": 2.9218725778949, "grad_norm": 0.31442791223526, "learning_rate": 2.040669200485623e-08, "loss": 0.3024, "step": 6283 }, { "epoch": 2.922337622074097, "grad_norm": 0.33434808254241943, "learning_rate": 2.016319609336459e-08, "loss": 0.3347, "step": 6284 }, { "epoch": 2.922802666253294, "grad_norm": 0.3095998167991638, "learning_rate": 1.992115869518474e-08, "loss": 0.3192, "step": 6285 }, { "epoch": 2.923267710432491, "grad_norm": 0.2919743061065674, "learning_rate": 1.968057988120553e-08, "loss": 0.3011, "step": 6286 }, { "epoch": 2.923732754611688, "grad_norm": 0.3112308084964752, "learning_rate": 1.9441459721887822e-08, "loss": 0.3284, "step": 6287 }, { "epoch": 2.9241977987908854, "grad_norm": 0.32711583375930786, "learning_rate": 1.920379828726726e-08, "loss": 0.3329, "step": 6288 }, { "epoch": 2.9246628429700823, "grad_norm": 0.30145832896232605, "learning_rate": 1.8967595646949834e-08, "loss": 0.292, "step": 6289 }, { "epoch": 2.925127887149279, "grad_norm": 0.3420089781284332, "learning_rate": 1.8732851870115755e-08, "loss": 0.32, "step": 6290 }, { "epoch": 2.925592931328476, "grad_norm": 0.34311848878860474, "learning_rate": 1.849956702551836e-08, "loss": 0.3236, "step": 6291 }, { "epoch": 2.926057975507673, "grad_norm": 0.31155329942703247, "learning_rate": 1.826774118148189e-08, "loss": 0.3191, "step": 6292 }, { "epoch": 2.9265230196868703, "grad_norm": 0.3236565589904785, "learning_rate": 1.803737440590536e-08, "loss": 0.3369, "step": 6293 }, { "epoch": 2.9269880638660672, "grad_norm": 0.28220975399017334, "learning_rate": 1.7808466766259246e-08, "loss": 0.2988, "step": 6294 }, { "epoch": 2.927453108045264, "grad_norm": 0.3211870789527893, "learning_rate": 1.758101832958603e-08, "loss": 0.3305, "step": 6295 }, { "epoch": 2.9279181522244615, "grad_norm": 0.3059036135673523, "learning_rate": 1.7355029162502978e-08, "loss": 0.2947, "step": 6296 }, { "epoch": 2.9283831964036584, "grad_norm": 0.31441813707351685, "learning_rate": 1.7130499331197703e-08, "loss": 0.2993, "step": 6297 }, { "epoch": 2.9288482405828553, "grad_norm": 0.32152053713798523, "learning_rate": 1.6907428901432045e-08, "loss": 0.3135, "step": 6298 }, { "epoch": 2.9293132847620527, "grad_norm": 0.328326940536499, "learning_rate": 1.668581793853874e-08, "loss": 0.3092, "step": 6299 }, { "epoch": 2.9297783289412496, "grad_norm": 0.3213350772857666, "learning_rate": 1.6465666507425314e-08, "loss": 0.3505, "step": 6300 }, { "epoch": 2.9302433731204465, "grad_norm": 0.3014281988143921, "learning_rate": 1.6246974672569083e-08, "loss": 0.2903, "step": 6301 }, { "epoch": 2.9307084172996434, "grad_norm": 0.31483855843544006, "learning_rate": 1.6029742498022692e-08, "loss": 0.3267, "step": 6302 }, { "epoch": 2.9311734614788403, "grad_norm": 0.32072848081588745, "learning_rate": 1.5813970047409144e-08, "loss": 0.2982, "step": 6303 }, { "epoch": 2.9316385056580376, "grad_norm": 0.3045773208141327, "learning_rate": 1.559965738392455e-08, "loss": 0.3062, "step": 6304 }, { "epoch": 2.9321035498372345, "grad_norm": 0.3604282736778259, "learning_rate": 1.538680457033814e-08, "loss": 0.3295, "step": 6305 }, { "epoch": 2.9325685940164314, "grad_norm": 0.3138650357723236, "learning_rate": 1.5175411668990613e-08, "loss": 0.2937, "step": 6306 }, { "epoch": 2.933033638195629, "grad_norm": 0.330479234457016, "learning_rate": 1.496547874179577e-08, "loss": 0.3091, "step": 6307 }, { "epoch": 2.9334986823748257, "grad_norm": 0.3400230407714844, "learning_rate": 1.475700585023998e-08, "loss": 0.3206, "step": 6308 }, { "epoch": 2.9339637265540226, "grad_norm": 0.33280929923057556, "learning_rate": 1.4549993055380519e-08, "loss": 0.3632, "step": 6309 }, { "epoch": 2.9344287707332195, "grad_norm": 0.29419514536857605, "learning_rate": 1.4344440417848882e-08, "loss": 0.2756, "step": 6310 }, { "epoch": 2.9348938149124164, "grad_norm": 0.3062974214553833, "learning_rate": 1.4140347997848025e-08, "loss": 0.3265, "step": 6311 }, { "epoch": 2.9353588590916138, "grad_norm": 0.3415301740169525, "learning_rate": 1.3937715855152912e-08, "loss": 0.3005, "step": 6312 }, { "epoch": 2.9358239032708107, "grad_norm": 0.3116869032382965, "learning_rate": 1.3736544049111622e-08, "loss": 0.2983, "step": 6313 }, { "epoch": 2.9362889474500076, "grad_norm": 0.32688966393470764, "learning_rate": 1.3536832638643693e-08, "loss": 0.3142, "step": 6314 }, { "epoch": 2.936753991629205, "grad_norm": 0.3033595681190491, "learning_rate": 1.333858168224178e-08, "loss": 0.3131, "step": 6315 }, { "epoch": 2.937219035808402, "grad_norm": 0.3291476368904114, "learning_rate": 1.3141791237969991e-08, "loss": 0.3139, "step": 6316 }, { "epoch": 2.9376840799875987, "grad_norm": 0.3021133542060852, "learning_rate": 1.2946461363465557e-08, "loss": 0.2932, "step": 6317 }, { "epoch": 2.938149124166796, "grad_norm": 0.32439282536506653, "learning_rate": 1.2752592115936601e-08, "loss": 0.3559, "step": 6318 }, { "epoch": 2.938614168345993, "grad_norm": 0.3350219428539276, "learning_rate": 1.2560183552164928e-08, "loss": 0.3485, "step": 6319 }, { "epoch": 2.93907921252519, "grad_norm": 0.3032825291156769, "learning_rate": 1.2369235728503792e-08, "loss": 0.315, "step": 6320 }, { "epoch": 2.939544256704387, "grad_norm": 0.311538428068161, "learning_rate": 1.2179748700879013e-08, "loss": 0.325, "step": 6321 }, { "epoch": 2.9400093008835837, "grad_norm": 0.3261773884296417, "learning_rate": 1.1991722524787307e-08, "loss": 0.3301, "step": 6322 }, { "epoch": 2.940474345062781, "grad_norm": 0.33156725764274597, "learning_rate": 1.1805157255299626e-08, "loss": 0.3229, "step": 6323 }, { "epoch": 2.940939389241978, "grad_norm": 0.3547118008136749, "learning_rate": 1.1620052947056703e-08, "loss": 0.2986, "step": 6324 }, { "epoch": 2.941404433421175, "grad_norm": 0.32039710879325867, "learning_rate": 1.1436409654273506e-08, "loss": 0.3285, "step": 6325 }, { "epoch": 2.941869477600372, "grad_norm": 0.31532758474349976, "learning_rate": 1.1254227430735898e-08, "loss": 0.3099, "step": 6326 }, { "epoch": 2.942334521779569, "grad_norm": 0.33241426944732666, "learning_rate": 1.1073506329802309e-08, "loss": 0.3177, "step": 6327 }, { "epoch": 2.942799565958766, "grad_norm": 0.3229113221168518, "learning_rate": 1.089424640440262e-08, "loss": 0.3215, "step": 6328 }, { "epoch": 2.9432646101379634, "grad_norm": 0.3332347273826599, "learning_rate": 1.0716447707039279e-08, "loss": 0.351, "step": 6329 }, { "epoch": 2.9437296543171603, "grad_norm": 0.3146977126598358, "learning_rate": 1.0540110289786742e-08, "loss": 0.3142, "step": 6330 }, { "epoch": 2.944194698496357, "grad_norm": 0.3125641942024231, "learning_rate": 1.0365234204291475e-08, "loss": 0.2814, "step": 6331 }, { "epoch": 2.944659742675554, "grad_norm": 0.32364359498023987, "learning_rate": 1.019181950177195e-08, "loss": 0.3173, "step": 6332 }, { "epoch": 2.945124786854751, "grad_norm": 0.3259218633174896, "learning_rate": 1.001986623301754e-08, "loss": 0.3478, "step": 6333 }, { "epoch": 2.9455898310339483, "grad_norm": 0.3398853540420532, "learning_rate": 9.849374448391846e-09, "loss": 0.3149, "step": 6334 }, { "epoch": 2.9460548752131452, "grad_norm": 0.31736600399017334, "learning_rate": 9.680344197828262e-09, "loss": 0.2984, "step": 6335 }, { "epoch": 2.946519919392342, "grad_norm": 0.3133545219898224, "learning_rate": 9.512775530833296e-09, "loss": 0.3267, "step": 6336 }, { "epoch": 2.9469849635715395, "grad_norm": 0.32863521575927734, "learning_rate": 9.346668496485468e-09, "loss": 0.3461, "step": 6337 }, { "epoch": 2.9474500077507364, "grad_norm": 0.3177638053894043, "learning_rate": 9.182023143434193e-09, "loss": 0.2977, "step": 6338 }, { "epoch": 2.9479150519299333, "grad_norm": 0.31308892369270325, "learning_rate": 9.018839519902012e-09, "loss": 0.3232, "step": 6339 }, { "epoch": 2.94838009610913, "grad_norm": 0.3451671600341797, "learning_rate": 8.857117673681804e-09, "loss": 0.3357, "step": 6340 }, { "epoch": 2.948845140288327, "grad_norm": 0.32019293308258057, "learning_rate": 8.696857652140677e-09, "loss": 0.3324, "step": 6341 }, { "epoch": 2.9493101844675245, "grad_norm": 0.33214157819747925, "learning_rate": 8.538059502214979e-09, "loss": 0.2954, "step": 6342 }, { "epoch": 2.9497752286467214, "grad_norm": 0.33576342463493347, "learning_rate": 8.380723270414726e-09, "loss": 0.3511, "step": 6343 }, { "epoch": 2.9502402728259183, "grad_norm": 0.32539987564086914, "learning_rate": 8.224849002820834e-09, "loss": 0.3342, "step": 6344 }, { "epoch": 2.9507053170051156, "grad_norm": 0.31811603903770447, "learning_rate": 8.07043674508623e-09, "loss": 0.3285, "step": 6345 }, { "epoch": 2.9511703611843125, "grad_norm": 0.30037549138069153, "learning_rate": 7.917486542436404e-09, "loss": 0.3136, "step": 6346 }, { "epoch": 2.9516354053635094, "grad_norm": 0.3149525225162506, "learning_rate": 7.765998439667743e-09, "loss": 0.3187, "step": 6347 }, { "epoch": 2.952100449542707, "grad_norm": 0.3301972448825836, "learning_rate": 7.615972481148094e-09, "loss": 0.329, "step": 6348 }, { "epoch": 2.9525654937219037, "grad_norm": 0.3264675736427307, "learning_rate": 7.467408710818414e-09, "loss": 0.335, "step": 6349 }, { "epoch": 2.9530305379011006, "grad_norm": 0.32105743885040283, "learning_rate": 7.320307172190011e-09, "loss": 0.3006, "step": 6350 }, { "epoch": 2.9534955820802975, "grad_norm": 0.3135838806629181, "learning_rate": 7.174667908346755e-09, "loss": 0.3276, "step": 6351 }, { "epoch": 2.9539606262594944, "grad_norm": 0.36441266536712646, "learning_rate": 7.0304909619439695e-09, "loss": 0.347, "step": 6352 }, { "epoch": 2.9544256704386918, "grad_norm": 0.32082051038742065, "learning_rate": 6.887776375208433e-09, "loss": 0.3151, "step": 6353 }, { "epoch": 2.9548907146178887, "grad_norm": 0.3240443170070648, "learning_rate": 6.74652418994004e-09, "loss": 0.3199, "step": 6354 }, { "epoch": 2.9553557587970856, "grad_norm": 0.31917044520378113, "learning_rate": 6.606734447507923e-09, "loss": 0.306, "step": 6355 }, { "epoch": 2.955820802976283, "grad_norm": 0.3240707218647003, "learning_rate": 6.4684071888554415e-09, "loss": 0.3439, "step": 6356 }, { "epoch": 2.95628584715548, "grad_norm": 0.3343864679336548, "learning_rate": 6.331542454495188e-09, "loss": 0.299, "step": 6357 }, { "epoch": 2.9567508913346767, "grad_norm": 0.3145192563533783, "learning_rate": 6.19614028451343e-09, "loss": 0.2969, "step": 6358 }, { "epoch": 2.957215935513874, "grad_norm": 0.3098326027393341, "learning_rate": 6.062200718567335e-09, "loss": 0.3037, "step": 6359 }, { "epoch": 2.957680979693071, "grad_norm": 0.3226139545440674, "learning_rate": 5.929723795884967e-09, "loss": 0.303, "step": 6360 }, { "epoch": 2.958146023872268, "grad_norm": 0.31032806634902954, "learning_rate": 5.798709555266957e-09, "loss": 0.3072, "step": 6361 }, { "epoch": 2.958611068051465, "grad_norm": 0.3300245702266693, "learning_rate": 5.669158035085387e-09, "loss": 0.3254, "step": 6362 }, { "epoch": 2.9590761122306617, "grad_norm": 0.30949413776397705, "learning_rate": 5.5410692732837946e-09, "loss": 0.3046, "step": 6363 }, { "epoch": 2.959541156409859, "grad_norm": 0.32829898595809937, "learning_rate": 5.414443307377171e-09, "loss": 0.3468, "step": 6364 }, { "epoch": 2.960006200589056, "grad_norm": 0.3059823215007782, "learning_rate": 5.2892801744525154e-09, "loss": 0.3097, "step": 6365 }, { "epoch": 2.960471244768253, "grad_norm": 0.3185559809207916, "learning_rate": 5.165579911167729e-09, "loss": 0.304, "step": 6366 }, { "epoch": 2.96093628894745, "grad_norm": 0.3140532076358795, "learning_rate": 5.043342553752717e-09, "loss": 0.2994, "step": 6367 }, { "epoch": 2.961401333126647, "grad_norm": 0.3156139552593231, "learning_rate": 4.922568138008843e-09, "loss": 0.3252, "step": 6368 }, { "epoch": 2.961866377305844, "grad_norm": 0.3156965374946594, "learning_rate": 4.803256699308923e-09, "loss": 0.3367, "step": 6369 }, { "epoch": 2.962331421485041, "grad_norm": 0.3089465796947479, "learning_rate": 4.685408272597225e-09, "loss": 0.3097, "step": 6370 }, { "epoch": 2.9627964656642383, "grad_norm": 0.3329167664051056, "learning_rate": 4.5690228923894744e-09, "loss": 0.3361, "step": 6371 }, { "epoch": 2.963261509843435, "grad_norm": 0.3192375600337982, "learning_rate": 4.454100592773958e-09, "loss": 0.3245, "step": 6372 }, { "epoch": 2.963726554022632, "grad_norm": 0.31667426228523254, "learning_rate": 4.340641407408752e-09, "loss": 0.3076, "step": 6373 }, { "epoch": 2.964191598201829, "grad_norm": 0.3276313841342926, "learning_rate": 4.228645369523943e-09, "loss": 0.3068, "step": 6374 }, { "epoch": 2.9646566423810263, "grad_norm": 0.3252321481704712, "learning_rate": 4.1181125119221785e-09, "loss": 0.3306, "step": 6375 }, { "epoch": 2.9651216865602232, "grad_norm": 0.3393605351448059, "learning_rate": 4.009042866976454e-09, "loss": 0.3405, "step": 6376 }, { "epoch": 2.96558673073942, "grad_norm": 0.3192119002342224, "learning_rate": 3.901436466631215e-09, "loss": 0.3125, "step": 6377 }, { "epoch": 2.9660517749186175, "grad_norm": 0.32561612129211426, "learning_rate": 3.795293342402917e-09, "loss": 0.331, "step": 6378 }, { "epoch": 2.9665168190978144, "grad_norm": 0.29712536931037903, "learning_rate": 3.690613525379472e-09, "loss": 0.3094, "step": 6379 }, { "epoch": 2.9669818632770113, "grad_norm": 0.3177908658981323, "learning_rate": 3.587397046219132e-09, "loss": 0.32, "step": 6380 }, { "epoch": 2.967446907456208, "grad_norm": 0.334224134683609, "learning_rate": 3.485643935152716e-09, "loss": 0.3327, "step": 6381 }, { "epoch": 2.967911951635405, "grad_norm": 0.3135557174682617, "learning_rate": 3.3853542219819403e-09, "loss": 0.3077, "step": 6382 }, { "epoch": 2.9683769958146025, "grad_norm": 0.3370562195777893, "learning_rate": 3.286527936079975e-09, "loss": 0.3069, "step": 6383 }, { "epoch": 2.9688420399937994, "grad_norm": 0.2906818091869354, "learning_rate": 3.1891651063920003e-09, "loss": 0.2884, "step": 6384 }, { "epoch": 2.9693070841729963, "grad_norm": 0.3011315166950226, "learning_rate": 3.0932657614329843e-09, "loss": 0.3154, "step": 6385 }, { "epoch": 2.9697721283521936, "grad_norm": 0.3322399854660034, "learning_rate": 2.998829929291569e-09, "loss": 0.3232, "step": 6386 }, { "epoch": 2.9702371725313905, "grad_norm": 0.31616201996803284, "learning_rate": 2.905857637625076e-09, "loss": 0.338, "step": 6387 }, { "epoch": 2.9707022167105874, "grad_norm": 0.2996443510055542, "learning_rate": 2.8143489136650547e-09, "loss": 0.2643, "step": 6388 }, { "epoch": 2.971167260889785, "grad_norm": 0.330763578414917, "learning_rate": 2.724303784211735e-09, "loss": 0.3372, "step": 6389 }, { "epoch": 2.9716323050689817, "grad_norm": 0.3177904486656189, "learning_rate": 2.635722275638464e-09, "loss": 0.3079, "step": 6390 }, { "epoch": 2.9720973492481786, "grad_norm": 0.32477807998657227, "learning_rate": 2.548604413888933e-09, "loss": 0.3184, "step": 6391 }, { "epoch": 2.9725623934273755, "grad_norm": 0.29389679431915283, "learning_rate": 2.462950224478844e-09, "loss": 0.2963, "step": 6392 }, { "epoch": 2.9730274376065724, "grad_norm": 0.3086054027080536, "learning_rate": 2.3787597324947953e-09, "loss": 0.3402, "step": 6393 }, { "epoch": 2.9734924817857697, "grad_norm": 0.2977517247200012, "learning_rate": 2.2960329625953957e-09, "loss": 0.3352, "step": 6394 }, { "epoch": 2.9739575259649667, "grad_norm": 0.31417447328567505, "learning_rate": 2.2147699390090425e-09, "loss": 0.3128, "step": 6395 }, { "epoch": 2.9744225701441636, "grad_norm": 0.3330288827419281, "learning_rate": 2.134970685536697e-09, "loss": 0.3592, "step": 6396 }, { "epoch": 2.974887614323361, "grad_norm": 0.30559176206588745, "learning_rate": 2.056635225550219e-09, "loss": 0.2894, "step": 6397 }, { "epoch": 2.975352658502558, "grad_norm": 0.31366166472435, "learning_rate": 1.9797635819934768e-09, "loss": 0.3098, "step": 6398 }, { "epoch": 2.9758177026817547, "grad_norm": 0.30758044123649597, "learning_rate": 1.904355777379574e-09, "loss": 0.2947, "step": 6399 }, { "epoch": 2.976282746860952, "grad_norm": 0.2907547652721405, "learning_rate": 1.830411833795287e-09, "loss": 0.298, "step": 6400 }, { "epoch": 2.976747791040149, "grad_norm": 0.3252081274986267, "learning_rate": 1.7579317728977363e-09, "loss": 0.3325, "step": 6401 }, { "epoch": 2.977212835219346, "grad_norm": 0.3026246428489685, "learning_rate": 1.6869156159143861e-09, "loss": 0.3322, "step": 6402 }, { "epoch": 2.977677879398543, "grad_norm": 0.3425191640853882, "learning_rate": 1.617363383645265e-09, "loss": 0.3364, "step": 6403 }, { "epoch": 2.9781429235777397, "grad_norm": 0.32005518674850464, "learning_rate": 1.549275096460745e-09, "loss": 0.332, "step": 6404 }, { "epoch": 2.978607967756937, "grad_norm": 0.2961218059062958, "learning_rate": 1.4826507743032071e-09, "loss": 0.2848, "step": 6405 }, { "epoch": 2.979073011936134, "grad_norm": 0.3002302944660187, "learning_rate": 1.417490436685376e-09, "loss": 0.3021, "step": 6406 }, { "epoch": 2.979538056115331, "grad_norm": 0.3076646625995636, "learning_rate": 1.3537941026914302e-09, "loss": 0.3196, "step": 6407 }, { "epoch": 2.980003100294528, "grad_norm": 0.29915478825569153, "learning_rate": 1.291561790978113e-09, "loss": 0.283, "step": 6408 }, { "epoch": 2.980468144473725, "grad_norm": 0.313909113407135, "learning_rate": 1.2307935197708453e-09, "loss": 0.3223, "step": 6409 }, { "epoch": 2.980933188652922, "grad_norm": 0.33077502250671387, "learning_rate": 1.1714893068687228e-09, "loss": 0.3171, "step": 6410 }, { "epoch": 2.981398232832119, "grad_norm": 0.3245851397514343, "learning_rate": 1.1136491696406293e-09, "loss": 0.3136, "step": 6411 }, { "epoch": 2.981863277011316, "grad_norm": 0.3183169364929199, "learning_rate": 1.057273125026903e-09, "loss": 0.3079, "step": 6412 }, { "epoch": 2.982328321190513, "grad_norm": 0.3176499605178833, "learning_rate": 1.0023611895393358e-09, "loss": 0.3185, "step": 6413 }, { "epoch": 2.98279336536971, "grad_norm": 0.29598644375801086, "learning_rate": 9.489133792611738e-10, "loss": 0.2992, "step": 6414 }, { "epoch": 2.983258409548907, "grad_norm": 0.32176917791366577, "learning_rate": 8.96929709845451e-10, "loss": 0.3118, "step": 6415 }, { "epoch": 2.9837234537281043, "grad_norm": 0.32616156339645386, "learning_rate": 8.464101965177662e-10, "loss": 0.342, "step": 6416 }, { "epoch": 2.9841884979073012, "grad_norm": 0.3436664640903473, "learning_rate": 7.973548540740616e-10, "loss": 0.323, "step": 6417 }, { "epoch": 2.984653542086498, "grad_norm": 0.3062160313129425, "learning_rate": 7.497636968828436e-10, "loss": 0.2861, "step": 6418 }, { "epoch": 2.9851185862656955, "grad_norm": 0.3244827389717102, "learning_rate": 7.036367388824073e-10, "loss": 0.3261, "step": 6419 }, { "epoch": 2.9855836304448924, "grad_norm": 0.3119675815105438, "learning_rate": 6.589739935819461e-10, "loss": 0.3165, "step": 6420 }, { "epoch": 2.9860486746240893, "grad_norm": 0.3089711666107178, "learning_rate": 6.157754740632183e-10, "loss": 0.3039, "step": 6421 }, { "epoch": 2.986513718803286, "grad_norm": 0.3472222089767456, "learning_rate": 5.7404119297777e-10, "loss": 0.2805, "step": 6422 }, { "epoch": 2.986978762982483, "grad_norm": 0.343313604593277, "learning_rate": 5.337711625497122e-10, "loss": 0.3385, "step": 6423 }, { "epoch": 2.9874438071616805, "grad_norm": 0.34118393063545227, "learning_rate": 4.949653945723886e-10, "loss": 0.3172, "step": 6424 }, { "epoch": 2.9879088513408774, "grad_norm": 0.331533282995224, "learning_rate": 4.576239004122629e-10, "loss": 0.3628, "step": 6425 }, { "epoch": 2.9883738955200743, "grad_norm": 0.30840861797332764, "learning_rate": 4.2174669100558673e-10, "loss": 0.2938, "step": 6426 }, { "epoch": 2.9888389396992716, "grad_norm": 0.3454267382621765, "learning_rate": 3.8733377686062115e-10, "loss": 0.3555, "step": 6427 }, { "epoch": 2.9893039838784685, "grad_norm": 0.3079105019569397, "learning_rate": 3.5438516805597067e-10, "loss": 0.3275, "step": 6428 }, { "epoch": 2.9897690280576654, "grad_norm": 0.32878822088241577, "learning_rate": 3.229008742416939e-10, "loss": 0.3502, "step": 6429 }, { "epoch": 2.9902340722368628, "grad_norm": 0.3611544072628021, "learning_rate": 2.928809046398584e-10, "loss": 0.3244, "step": 6430 }, { "epoch": 2.9906991164160597, "grad_norm": 0.3192159831523895, "learning_rate": 2.6432526804176517e-10, "loss": 0.3126, "step": 6431 }, { "epoch": 2.9911641605952566, "grad_norm": 0.353327214717865, "learning_rate": 2.3723397281127937e-10, "loss": 0.3284, "step": 6432 }, { "epoch": 2.9916292047744535, "grad_norm": 0.31189650297164917, "learning_rate": 2.1160702688260981e-10, "loss": 0.3192, "step": 6433 }, { "epoch": 2.9920942489536504, "grad_norm": 0.31306004524230957, "learning_rate": 1.8744443776252952e-10, "loss": 0.3168, "step": 6434 }, { "epoch": 2.9925592931328477, "grad_norm": 0.3070635497570038, "learning_rate": 1.6474621252704494e-10, "loss": 0.3385, "step": 6435 }, { "epoch": 2.9930243373120446, "grad_norm": 0.32101500034332275, "learning_rate": 1.435123578241715e-10, "loss": 0.3346, "step": 6436 }, { "epoch": 2.9934893814912416, "grad_norm": 0.3093291223049164, "learning_rate": 1.2374287987337864e-10, "loss": 0.3378, "step": 6437 }, { "epoch": 2.993954425670439, "grad_norm": 0.3259122371673584, "learning_rate": 1.0543778446392427e-10, "loss": 0.3228, "step": 6438 }, { "epoch": 2.994419469849636, "grad_norm": 0.31066158413887024, "learning_rate": 8.859707695818564e-11, "loss": 0.29, "step": 6439 }, { "epoch": 2.9948845140288327, "grad_norm": 0.31667444109916687, "learning_rate": 7.322076228777342e-11, "loss": 0.3338, "step": 6440 }, { "epoch": 2.9953495582080296, "grad_norm": 0.3047296404838562, "learning_rate": 5.93088449568624e-11, "loss": 0.2968, "step": 6441 }, { "epoch": 2.9958146023872265, "grad_norm": 0.3278631567955017, "learning_rate": 4.686132903886087e-11, "loss": 0.3333, "step": 6442 }, { "epoch": 2.996279646566424, "grad_norm": 0.3081776201725006, "learning_rate": 3.587821818085146e-11, "loss": 0.2953, "step": 6443 }, { "epoch": 2.9967446907456208, "grad_norm": 0.3233565092086792, "learning_rate": 2.6359515598595174e-11, "loss": 0.3309, "step": 6444 }, { "epoch": 2.9972097349248177, "grad_norm": 0.29509052634239197, "learning_rate": 1.8305224079862015e-11, "loss": 0.3168, "step": 6445 }, { "epoch": 2.997674779104015, "grad_norm": 0.3146141469478607, "learning_rate": 1.1715345984431026e-11, "loss": 0.3229, "step": 6446 }, { "epoch": 2.998139823283212, "grad_norm": 0.3134753406047821, "learning_rate": 6.5898832424249324e-12, "loss": 0.3339, "step": 6447 }, { "epoch": 2.998604867462409, "grad_norm": 0.30789443850517273, "learning_rate": 2.928837353755043e-12, "loss": 0.3139, "step": 6448 }, { "epoch": 2.999069911641606, "grad_norm": 0.31371957063674927, "learning_rate": 7.322093920070217e-13, "loss": 0.3199, "step": 6449 }, { "epoch": 2.999534955820803, "grad_norm": 0.3083919286727905, "learning_rate": 0.0, "loss": 0.3054, "step": 6450 }, { "epoch": 2.999534955820803, "step": 6450, "total_flos": 6039468541263872.0, "train_loss": 0.3618491206889929, "train_runtime": 197008.5242, "train_samples_per_second": 3.143, "train_steps_per_second": 0.033 } ], "logging_steps": 1.0, "max_steps": 6450, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6039468541263872.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }