{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2069, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00048344210780759005, "grad_norm": 2.4757904153326913, "learning_rate": 0.0, "loss": 0.9924, "step": 1 }, { "epoch": 0.0009668842156151801, "grad_norm": 2.3793618820940923, "learning_rate": 9.615384615384617e-08, "loss": 0.9738, "step": 2 }, { "epoch": 0.0014503263234227702, "grad_norm": 2.375691201697703, "learning_rate": 1.9230769230769234e-07, "loss": 0.9588, "step": 3 }, { "epoch": 0.0019337684312303602, "grad_norm": 2.3403619553808497, "learning_rate": 2.884615384615385e-07, "loss": 0.9862, "step": 4 }, { "epoch": 0.00241721053903795, "grad_norm": 2.3613475552419394, "learning_rate": 3.846153846153847e-07, "loss": 0.9758, "step": 5 }, { "epoch": 0.0029006526468455403, "grad_norm": 2.374422358129782, "learning_rate": 4.807692307692308e-07, "loss": 0.9716, "step": 6 }, { "epoch": 0.00338409475465313, "grad_norm": 2.478706471115894, "learning_rate": 5.76923076923077e-07, "loss": 0.976, "step": 7 }, { "epoch": 0.0038675368624607204, "grad_norm": 2.3811968693026198, "learning_rate": 6.730769230769231e-07, "loss": 0.9873, "step": 8 }, { "epoch": 0.00435097897026831, "grad_norm": 2.2147341913024956, "learning_rate": 7.692307692307694e-07, "loss": 0.9286, "step": 9 }, { "epoch": 0.0048344210780759, "grad_norm": 2.335255162349414, "learning_rate": 8.653846153846154e-07, "loss": 0.9845, "step": 10 }, { "epoch": 0.005317863185883491, "grad_norm": 2.218894644037587, "learning_rate": 9.615384615384617e-07, "loss": 0.95, "step": 11 }, { "epoch": 0.005801305293691081, "grad_norm": 2.2403773948516226, "learning_rate": 1.0576923076923078e-06, "loss": 0.9715, "step": 12 }, { "epoch": 0.0062847474014986705, "grad_norm": 2.164785866254398, "learning_rate": 1.153846153846154e-06, "loss": 0.9422, "step": 13 }, { "epoch": 0.00676818950930626, "grad_norm": 2.2075110374685947, "learning_rate": 1.25e-06, "loss": 0.965, "step": 14 }, { "epoch": 0.007251631617113851, "grad_norm": 1.9308798071113116, "learning_rate": 1.3461538461538462e-06, "loss": 0.9372, "step": 15 }, { "epoch": 0.007735073724921441, "grad_norm": 1.8705357350667309, "learning_rate": 1.4423076923076922e-06, "loss": 0.9443, "step": 16 }, { "epoch": 0.00821851583272903, "grad_norm": 1.775691766227149, "learning_rate": 1.5384615384615387e-06, "loss": 0.9362, "step": 17 }, { "epoch": 0.00870195794053662, "grad_norm": 1.7290053738093054, "learning_rate": 1.6346153846153848e-06, "loss": 0.9298, "step": 18 }, { "epoch": 0.00918540004834421, "grad_norm": 1.6541389612298973, "learning_rate": 1.7307692307692308e-06, "loss": 0.9336, "step": 19 }, { "epoch": 0.0096688421561518, "grad_norm": 1.2338607620225968, "learning_rate": 1.826923076923077e-06, "loss": 0.9055, "step": 20 }, { "epoch": 0.01015228426395939, "grad_norm": 1.1808086522456918, "learning_rate": 1.9230769230769234e-06, "loss": 0.8962, "step": 21 }, { "epoch": 0.010635726371766982, "grad_norm": 1.090531117286559, "learning_rate": 2.0192307692307692e-06, "loss": 0.8702, "step": 22 }, { "epoch": 0.011119168479574571, "grad_norm": 1.095517820053717, "learning_rate": 2.1153846153846155e-06, "loss": 0.8816, "step": 23 }, { "epoch": 0.011602610587382161, "grad_norm": 1.0208393908518454, "learning_rate": 2.211538461538462e-06, "loss": 0.8699, "step": 24 }, { "epoch": 0.012086052695189751, "grad_norm": 1.004109121666044, "learning_rate": 2.307692307692308e-06, "loss": 0.8669, "step": 25 }, { "epoch": 0.012569494802997341, "grad_norm": 0.98169412760157, "learning_rate": 2.403846153846154e-06, "loss": 0.8371, "step": 26 }, { "epoch": 0.01305293691080493, "grad_norm": 0.9209444270757048, "learning_rate": 2.5e-06, "loss": 0.8388, "step": 27 }, { "epoch": 0.01353637901861252, "grad_norm": 0.8619822284316448, "learning_rate": 2.5961538461538465e-06, "loss": 0.8041, "step": 28 }, { "epoch": 0.01401982112642011, "grad_norm": 0.9241232197315488, "learning_rate": 2.6923076923076923e-06, "loss": 0.8091, "step": 29 }, { "epoch": 0.014503263234227702, "grad_norm": 0.917429451582305, "learning_rate": 2.7884615384615386e-06, "loss": 0.7749, "step": 30 }, { "epoch": 0.014986705342035292, "grad_norm": 0.9043786370452085, "learning_rate": 2.8846153846153845e-06, "loss": 0.8144, "step": 31 }, { "epoch": 0.015470147449842882, "grad_norm": 0.820031258272968, "learning_rate": 2.980769230769231e-06, "loss": 0.7931, "step": 32 }, { "epoch": 0.01595358955765047, "grad_norm": 0.7726701862119408, "learning_rate": 3.0769230769230774e-06, "loss": 0.7903, "step": 33 }, { "epoch": 0.01643703166545806, "grad_norm": 0.6871638945331215, "learning_rate": 3.1730769230769233e-06, "loss": 0.7256, "step": 34 }, { "epoch": 0.01692047377326565, "grad_norm": 0.7302580243312591, "learning_rate": 3.2692307692307696e-06, "loss": 0.7956, "step": 35 }, { "epoch": 0.01740391588107324, "grad_norm": 0.6737498928543134, "learning_rate": 3.365384615384616e-06, "loss": 0.7478, "step": 36 }, { "epoch": 0.01788735798888083, "grad_norm": 0.6914440787905148, "learning_rate": 3.4615384615384617e-06, "loss": 0.7621, "step": 37 }, { "epoch": 0.01837080009668842, "grad_norm": 0.6869329802424697, "learning_rate": 3.557692307692308e-06, "loss": 0.7706, "step": 38 }, { "epoch": 0.01885424220449601, "grad_norm": 0.6549740713679569, "learning_rate": 3.653846153846154e-06, "loss": 0.7237, "step": 39 }, { "epoch": 0.0193376843123036, "grad_norm": 0.6922145753217636, "learning_rate": 3.7500000000000005e-06, "loss": 0.7537, "step": 40 }, { "epoch": 0.01982112642011119, "grad_norm": 0.665714225887781, "learning_rate": 3.846153846153847e-06, "loss": 0.7656, "step": 41 }, { "epoch": 0.02030456852791878, "grad_norm": 0.6390200477155564, "learning_rate": 3.942307692307692e-06, "loss": 0.7558, "step": 42 }, { "epoch": 0.020788010635726373, "grad_norm": 0.6558268717213803, "learning_rate": 4.0384615384615385e-06, "loss": 0.7408, "step": 43 }, { "epoch": 0.021271452743533963, "grad_norm": 0.6191043915893901, "learning_rate": 4.134615384615385e-06, "loss": 0.7482, "step": 44 }, { "epoch": 0.021754894851341553, "grad_norm": 0.6193196066166552, "learning_rate": 4.230769230769231e-06, "loss": 0.7424, "step": 45 }, { "epoch": 0.022238336959149143, "grad_norm": 0.6141046550876093, "learning_rate": 4.326923076923077e-06, "loss": 0.7372, "step": 46 }, { "epoch": 0.022721779066956733, "grad_norm": 0.6265315399192994, "learning_rate": 4.423076923076924e-06, "loss": 0.7362, "step": 47 }, { "epoch": 0.023205221174764323, "grad_norm": 0.6704953048927751, "learning_rate": 4.51923076923077e-06, "loss": 0.7326, "step": 48 }, { "epoch": 0.023688663282571912, "grad_norm": 0.6544522629648533, "learning_rate": 4.615384615384616e-06, "loss": 0.7275, "step": 49 }, { "epoch": 0.024172105390379502, "grad_norm": 0.6277879949612973, "learning_rate": 4.711538461538462e-06, "loss": 0.7311, "step": 50 }, { "epoch": 0.024655547498187092, "grad_norm": 0.5924725867824154, "learning_rate": 4.807692307692308e-06, "loss": 0.7261, "step": 51 }, { "epoch": 0.025138989605994682, "grad_norm": 0.591545350722231, "learning_rate": 4.903846153846154e-06, "loss": 0.7092, "step": 52 }, { "epoch": 0.025622431713802272, "grad_norm": 0.5698079528908845, "learning_rate": 5e-06, "loss": 0.7093, "step": 53 }, { "epoch": 0.02610587382160986, "grad_norm": 0.6162650749995418, "learning_rate": 5.096153846153846e-06, "loss": 0.687, "step": 54 }, { "epoch": 0.02658931592941745, "grad_norm": 0.5577930499697958, "learning_rate": 5.192307692307693e-06, "loss": 0.7143, "step": 55 }, { "epoch": 0.02707275803722504, "grad_norm": 0.5640209708881836, "learning_rate": 5.288461538461539e-06, "loss": 0.7059, "step": 56 }, { "epoch": 0.02755620014503263, "grad_norm": 0.5430126891143467, "learning_rate": 5.384615384615385e-06, "loss": 0.704, "step": 57 }, { "epoch": 0.02803964225284022, "grad_norm": 0.6009517822786309, "learning_rate": 5.480769230769232e-06, "loss": 0.7169, "step": 58 }, { "epoch": 0.02852308436064781, "grad_norm": 0.6167733540891279, "learning_rate": 5.576923076923077e-06, "loss": 0.7065, "step": 59 }, { "epoch": 0.029006526468455404, "grad_norm": 0.5731458421774205, "learning_rate": 5.6730769230769235e-06, "loss": 0.6373, "step": 60 }, { "epoch": 0.029489968576262994, "grad_norm": 0.5489403473955915, "learning_rate": 5.769230769230769e-06, "loss": 0.7018, "step": 61 }, { "epoch": 0.029973410684070584, "grad_norm": 0.5325599545502842, "learning_rate": 5.865384615384616e-06, "loss": 0.6959, "step": 62 }, { "epoch": 0.030456852791878174, "grad_norm": 0.5478537783639954, "learning_rate": 5.961538461538462e-06, "loss": 0.6896, "step": 63 }, { "epoch": 0.030940294899685764, "grad_norm": 0.5363515063211778, "learning_rate": 6.057692307692308e-06, "loss": 0.7014, "step": 64 }, { "epoch": 0.03142373700749335, "grad_norm": 0.5641946867306303, "learning_rate": 6.153846153846155e-06, "loss": 0.6903, "step": 65 }, { "epoch": 0.03190717911530094, "grad_norm": 0.5481835775113026, "learning_rate": 6.25e-06, "loss": 0.6893, "step": 66 }, { "epoch": 0.03239062122310853, "grad_norm": 0.5143476489389097, "learning_rate": 6.3461538461538466e-06, "loss": 0.6946, "step": 67 }, { "epoch": 0.03287406333091612, "grad_norm": 0.590656354467126, "learning_rate": 6.442307692307693e-06, "loss": 0.6788, "step": 68 }, { "epoch": 0.03335750543872371, "grad_norm": 0.5203563663337313, "learning_rate": 6.538461538461539e-06, "loss": 0.6847, "step": 69 }, { "epoch": 0.0338409475465313, "grad_norm": 0.5084500426939229, "learning_rate": 6.6346153846153846e-06, "loss": 0.7086, "step": 70 }, { "epoch": 0.03432438965433889, "grad_norm": 0.5032784264719405, "learning_rate": 6.730769230769232e-06, "loss": 0.6724, "step": 71 }, { "epoch": 0.03480783176214648, "grad_norm": 0.5205048813341548, "learning_rate": 6.826923076923078e-06, "loss": 0.6592, "step": 72 }, { "epoch": 0.03529127386995407, "grad_norm": 0.5066251849073853, "learning_rate": 6.923076923076923e-06, "loss": 0.6674, "step": 73 }, { "epoch": 0.03577471597776166, "grad_norm": 0.5305760257061701, "learning_rate": 7.01923076923077e-06, "loss": 0.6665, "step": 74 }, { "epoch": 0.03625815808556925, "grad_norm": 0.5583636863825877, "learning_rate": 7.115384615384616e-06, "loss": 0.6685, "step": 75 }, { "epoch": 0.03674160019337684, "grad_norm": 0.6055857508188283, "learning_rate": 7.211538461538462e-06, "loss": 0.6826, "step": 76 }, { "epoch": 0.03722504230118443, "grad_norm": 0.5576393446552599, "learning_rate": 7.307692307692308e-06, "loss": 0.6811, "step": 77 }, { "epoch": 0.03770848440899202, "grad_norm": 0.5550469150359895, "learning_rate": 7.403846153846155e-06, "loss": 0.6752, "step": 78 }, { "epoch": 0.03819192651679961, "grad_norm": 0.5095358853416947, "learning_rate": 7.500000000000001e-06, "loss": 0.657, "step": 79 }, { "epoch": 0.0386753686246072, "grad_norm": 0.519449515803278, "learning_rate": 7.5961538461538465e-06, "loss": 0.6326, "step": 80 }, { "epoch": 0.03915881073241479, "grad_norm": 0.5360371671954463, "learning_rate": 7.692307692307694e-06, "loss": 0.6577, "step": 81 }, { "epoch": 0.03964225284022238, "grad_norm": 0.5220981103197152, "learning_rate": 7.78846153846154e-06, "loss": 0.6803, "step": 82 }, { "epoch": 0.04012569494802997, "grad_norm": 0.5357442529489778, "learning_rate": 7.884615384615384e-06, "loss": 0.662, "step": 83 }, { "epoch": 0.04060913705583756, "grad_norm": 0.5922567510802571, "learning_rate": 7.980769230769232e-06, "loss": 0.6784, "step": 84 }, { "epoch": 0.04109257916364515, "grad_norm": 0.5471555288958341, "learning_rate": 8.076923076923077e-06, "loss": 0.663, "step": 85 }, { "epoch": 0.04157602127145275, "grad_norm": 0.5234614832210157, "learning_rate": 8.173076923076923e-06, "loss": 0.6633, "step": 86 }, { "epoch": 0.04205946337926034, "grad_norm": 0.5014680527453607, "learning_rate": 8.26923076923077e-06, "loss": 0.6345, "step": 87 }, { "epoch": 0.04254290548706793, "grad_norm": 0.5541925854592269, "learning_rate": 8.365384615384616e-06, "loss": 0.661, "step": 88 }, { "epoch": 0.043026347594875516, "grad_norm": 0.5556737957241218, "learning_rate": 8.461538461538462e-06, "loss": 0.6476, "step": 89 }, { "epoch": 0.043509789702683106, "grad_norm": 0.5358812925229628, "learning_rate": 8.557692307692308e-06, "loss": 0.6667, "step": 90 }, { "epoch": 0.043993231810490696, "grad_norm": 0.5285944473021625, "learning_rate": 8.653846153846155e-06, "loss": 0.6558, "step": 91 }, { "epoch": 0.044476673918298286, "grad_norm": 0.6130129115794695, "learning_rate": 8.750000000000001e-06, "loss": 0.6662, "step": 92 }, { "epoch": 0.044960116026105876, "grad_norm": 0.6086871477606206, "learning_rate": 8.846153846153847e-06, "loss": 0.6768, "step": 93 }, { "epoch": 0.045443558133913466, "grad_norm": 0.534737794998822, "learning_rate": 8.942307692307693e-06, "loss": 0.633, "step": 94 }, { "epoch": 0.045927000241721055, "grad_norm": 0.5048674854153722, "learning_rate": 9.03846153846154e-06, "loss": 0.6075, "step": 95 }, { "epoch": 0.046410442349528645, "grad_norm": 0.5516912026027078, "learning_rate": 9.134615384615384e-06, "loss": 0.623, "step": 96 }, { "epoch": 0.046893884457336235, "grad_norm": 0.6083291149980872, "learning_rate": 9.230769230769232e-06, "loss": 0.6556, "step": 97 }, { "epoch": 0.047377326565143825, "grad_norm": 0.5460750932826393, "learning_rate": 9.326923076923079e-06, "loss": 0.6524, "step": 98 }, { "epoch": 0.047860768672951415, "grad_norm": 0.5459534721301705, "learning_rate": 9.423076923076923e-06, "loss": 0.6449, "step": 99 }, { "epoch": 0.048344210780759005, "grad_norm": 0.5622412415254093, "learning_rate": 9.51923076923077e-06, "loss": 0.6517, "step": 100 }, { "epoch": 0.048827652888566594, "grad_norm": 0.6148179967646931, "learning_rate": 9.615384615384616e-06, "loss": 0.636, "step": 101 }, { "epoch": 0.049311094996374184, "grad_norm": 0.5377477077942675, "learning_rate": 9.711538461538462e-06, "loss": 0.6569, "step": 102 }, { "epoch": 0.049794537104181774, "grad_norm": 0.535881794576154, "learning_rate": 9.807692307692308e-06, "loss": 0.6515, "step": 103 }, { "epoch": 0.050277979211989364, "grad_norm": 0.5554528998874018, "learning_rate": 9.903846153846155e-06, "loss": 0.6471, "step": 104 }, { "epoch": 0.050761421319796954, "grad_norm": 0.5472055318440415, "learning_rate": 1e-05, "loss": 0.6212, "step": 105 }, { "epoch": 0.051244863427604544, "grad_norm": 0.5562423079812571, "learning_rate": 9.99999360979851e-06, "loss": 0.6483, "step": 106 }, { "epoch": 0.051728305535412133, "grad_norm": 0.6203972023036308, "learning_rate": 9.999974439210376e-06, "loss": 0.6474, "step": 107 }, { "epoch": 0.05221174764321972, "grad_norm": 0.5879110259866966, "learning_rate": 9.999942488284598e-06, "loss": 0.6506, "step": 108 }, { "epoch": 0.05269518975102731, "grad_norm": 0.5415023727684817, "learning_rate": 9.999897757102843e-06, "loss": 0.641, "step": 109 }, { "epoch": 0.0531786318588349, "grad_norm": 0.5661080832571289, "learning_rate": 9.99984024577945e-06, "loss": 0.6561, "step": 110 }, { "epoch": 0.05366207396664249, "grad_norm": 0.6384080684659277, "learning_rate": 9.999769954461425e-06, "loss": 0.6181, "step": 111 }, { "epoch": 0.05414551607445008, "grad_norm": 0.5321086465207798, "learning_rate": 9.999686883328433e-06, "loss": 0.6269, "step": 112 }, { "epoch": 0.05462895818225767, "grad_norm": 0.5658443861351884, "learning_rate": 9.999591032592813e-06, "loss": 0.6317, "step": 113 }, { "epoch": 0.05511240029006526, "grad_norm": 0.5759014415066968, "learning_rate": 9.999482402499569e-06, "loss": 0.6468, "step": 114 }, { "epoch": 0.05559584239787285, "grad_norm": 0.5480587710988183, "learning_rate": 9.999360993326366e-06, "loss": 0.6359, "step": 115 }, { "epoch": 0.05607928450568044, "grad_norm": 0.6380718424826206, "learning_rate": 9.999226805383534e-06, "loss": 0.6349, "step": 116 }, { "epoch": 0.05656272661348803, "grad_norm": 0.5246861209498886, "learning_rate": 9.999079839014074e-06, "loss": 0.6399, "step": 117 }, { "epoch": 0.05704616872129562, "grad_norm": 0.515686613549135, "learning_rate": 9.998920094593637e-06, "loss": 0.5984, "step": 118 }, { "epoch": 0.05752961082910321, "grad_norm": 0.5607127828178857, "learning_rate": 9.998747572530548e-06, "loss": 0.6398, "step": 119 }, { "epoch": 0.05801305293691081, "grad_norm": 0.5459763409466101, "learning_rate": 9.998562273265786e-06, "loss": 0.626, "step": 120 }, { "epoch": 0.0584964950447184, "grad_norm": 0.5525418327052581, "learning_rate": 9.998364197272988e-06, "loss": 0.6537, "step": 121 }, { "epoch": 0.05897993715252599, "grad_norm": 6.74083445541264, "learning_rate": 9.998153345058454e-06, "loss": 0.9475, "step": 122 }, { "epoch": 0.05946337926033358, "grad_norm": 0.6141628359508349, "learning_rate": 9.997929717161142e-06, "loss": 0.6473, "step": 123 }, { "epoch": 0.05994682136814117, "grad_norm": 0.6652958169663876, "learning_rate": 9.997693314152658e-06, "loss": 0.6342, "step": 124 }, { "epoch": 0.06043026347594876, "grad_norm": 0.606711498986106, "learning_rate": 9.99744413663727e-06, "loss": 0.623, "step": 125 }, { "epoch": 0.06091370558375635, "grad_norm": 0.5175309840849823, "learning_rate": 9.997182185251896e-06, "loss": 0.6221, "step": 126 }, { "epoch": 0.06139714769156394, "grad_norm": 0.5341566674562975, "learning_rate": 9.996907460666104e-06, "loss": 0.6357, "step": 127 }, { "epoch": 0.06188058979937153, "grad_norm": 0.5922792510376619, "learning_rate": 9.996619963582113e-06, "loss": 0.6043, "step": 128 }, { "epoch": 0.06236403190717912, "grad_norm": 0.5694036510960461, "learning_rate": 9.996319694734787e-06, "loss": 0.6311, "step": 129 }, { "epoch": 0.0628474740149867, "grad_norm": 0.5540844850790518, "learning_rate": 9.99600665489164e-06, "loss": 0.6411, "step": 130 }, { "epoch": 0.06333091612279429, "grad_norm": 0.5371960793753483, "learning_rate": 9.995680844852824e-06, "loss": 0.6403, "step": 131 }, { "epoch": 0.06381435823060189, "grad_norm": 0.5225384791967033, "learning_rate": 9.995342265451138e-06, "loss": 0.6269, "step": 132 }, { "epoch": 0.06429780033840947, "grad_norm": 0.6035451474536077, "learning_rate": 9.994990917552017e-06, "loss": 0.6321, "step": 133 }, { "epoch": 0.06478124244621707, "grad_norm": 0.6507380493478006, "learning_rate": 9.994626802053536e-06, "loss": 0.6236, "step": 134 }, { "epoch": 0.06526468455402465, "grad_norm": 0.5456651842881993, "learning_rate": 9.994249919886402e-06, "loss": 0.6258, "step": 135 }, { "epoch": 0.06574812666183225, "grad_norm": 0.5172506944070536, "learning_rate": 9.993860272013958e-06, "loss": 0.6162, "step": 136 }, { "epoch": 0.06623156876963984, "grad_norm": 0.6233262394445207, "learning_rate": 9.993457859432172e-06, "loss": 0.6261, "step": 137 }, { "epoch": 0.06671501087744743, "grad_norm": 0.6073445562745826, "learning_rate": 9.993042683169647e-06, "loss": 0.6371, "step": 138 }, { "epoch": 0.06719845298525502, "grad_norm": 0.5857241687958673, "learning_rate": 9.992614744287605e-06, "loss": 0.6275, "step": 139 }, { "epoch": 0.0676818950930626, "grad_norm": 0.5304150460003405, "learning_rate": 9.992174043879893e-06, "loss": 0.6175, "step": 140 }, { "epoch": 0.0681653372008702, "grad_norm": 0.5933722892089892, "learning_rate": 9.991720583072975e-06, "loss": 0.6255, "step": 141 }, { "epoch": 0.06864877930867778, "grad_norm": 0.561723953482763, "learning_rate": 9.991254363025935e-06, "loss": 0.6257, "step": 142 }, { "epoch": 0.06913222141648538, "grad_norm": 0.532228224452236, "learning_rate": 9.99077538493047e-06, "loss": 0.6301, "step": 143 }, { "epoch": 0.06961566352429296, "grad_norm": 0.541783938730816, "learning_rate": 9.990283650010883e-06, "loss": 0.619, "step": 144 }, { "epoch": 0.07009910563210056, "grad_norm": 0.5606995950440783, "learning_rate": 9.989779159524091e-06, "loss": 0.5818, "step": 145 }, { "epoch": 0.07058254773990814, "grad_norm": 0.5286741282148979, "learning_rate": 9.989261914759612e-06, "loss": 0.6105, "step": 146 }, { "epoch": 0.07106598984771574, "grad_norm": 0.525375741245272, "learning_rate": 9.988731917039564e-06, "loss": 0.6154, "step": 147 }, { "epoch": 0.07154943195552332, "grad_norm": 0.5132546936158348, "learning_rate": 9.988189167718665e-06, "loss": 0.5533, "step": 148 }, { "epoch": 0.07203287406333092, "grad_norm": 0.5330232205089095, "learning_rate": 9.987633668184227e-06, "loss": 0.6281, "step": 149 }, { "epoch": 0.0725163161711385, "grad_norm": 0.5459730729112252, "learning_rate": 9.98706541985615e-06, "loss": 0.5836, "step": 150 }, { "epoch": 0.0729997582789461, "grad_norm": 0.5818263727750432, "learning_rate": 9.986484424186922e-06, "loss": 0.6246, "step": 151 }, { "epoch": 0.07348320038675368, "grad_norm": 0.5754133435232375, "learning_rate": 9.985890682661616e-06, "loss": 0.6038, "step": 152 }, { "epoch": 0.07396664249456128, "grad_norm": 0.5528911744587542, "learning_rate": 9.985284196797884e-06, "loss": 0.6246, "step": 153 }, { "epoch": 0.07445008460236886, "grad_norm": 0.5484687585797547, "learning_rate": 9.984664968145953e-06, "loss": 0.6318, "step": 154 }, { "epoch": 0.07493352671017646, "grad_norm": 0.5351986552762329, "learning_rate": 9.984032998288617e-06, "loss": 0.6184, "step": 155 }, { "epoch": 0.07541696881798404, "grad_norm": 0.519416066205614, "learning_rate": 9.983388288841246e-06, "loss": 0.6185, "step": 156 }, { "epoch": 0.07590041092579164, "grad_norm": 0.5470449402548487, "learning_rate": 9.982730841451768e-06, "loss": 0.625, "step": 157 }, { "epoch": 0.07638385303359922, "grad_norm": 0.5887016805140373, "learning_rate": 9.982060657800672e-06, "loss": 0.6183, "step": 158 }, { "epoch": 0.07686729514140682, "grad_norm": 0.5522566946881194, "learning_rate": 9.981377739601002e-06, "loss": 0.6137, "step": 159 }, { "epoch": 0.0773507372492144, "grad_norm": 0.5411997809451911, "learning_rate": 9.980682088598349e-06, "loss": 0.6229, "step": 160 }, { "epoch": 0.077834179357022, "grad_norm": 0.52840707851752, "learning_rate": 9.979973706570856e-06, "loss": 0.614, "step": 161 }, { "epoch": 0.07831762146482958, "grad_norm": 0.6047062373713257, "learning_rate": 9.979252595329204e-06, "loss": 0.6222, "step": 162 }, { "epoch": 0.07880106357263718, "grad_norm": 0.5420471794760692, "learning_rate": 9.978518756716611e-06, "loss": 0.5856, "step": 163 }, { "epoch": 0.07928450568044476, "grad_norm": 0.5857386315586672, "learning_rate": 9.977772192608827e-06, "loss": 0.6291, "step": 164 }, { "epoch": 0.07976794778825236, "grad_norm": 0.5691356356316107, "learning_rate": 9.977012904914133e-06, "loss": 0.6149, "step": 165 }, { "epoch": 0.08025138989605994, "grad_norm": 0.5823273363045892, "learning_rate": 9.976240895573326e-06, "loss": 0.6147, "step": 166 }, { "epoch": 0.08073483200386754, "grad_norm": 0.538212010864403, "learning_rate": 9.975456166559725e-06, "loss": 0.6002, "step": 167 }, { "epoch": 0.08121827411167512, "grad_norm": 0.601371610274862, "learning_rate": 9.974658719879163e-06, "loss": 0.606, "step": 168 }, { "epoch": 0.08170171621948272, "grad_norm": 0.588104162701253, "learning_rate": 9.973848557569974e-06, "loss": 0.6226, "step": 169 }, { "epoch": 0.0821851583272903, "grad_norm": 0.5316828963553285, "learning_rate": 9.973025681703e-06, "loss": 0.6144, "step": 170 }, { "epoch": 0.0826686004350979, "grad_norm": 0.5405916050680715, "learning_rate": 9.972190094381578e-06, "loss": 0.6148, "step": 171 }, { "epoch": 0.0831520425429055, "grad_norm": 0.5102891757426009, "learning_rate": 9.971341797741538e-06, "loss": 0.616, "step": 172 }, { "epoch": 0.08363548465071308, "grad_norm": 0.5551757535954606, "learning_rate": 9.970480793951194e-06, "loss": 0.6196, "step": 173 }, { "epoch": 0.08411892675852067, "grad_norm": 0.5349760515746151, "learning_rate": 9.96960708521134e-06, "loss": 0.5902, "step": 174 }, { "epoch": 0.08460236886632826, "grad_norm": 0.5713299053870873, "learning_rate": 9.968720673755246e-06, "loss": 0.6039, "step": 175 }, { "epoch": 0.08508581097413585, "grad_norm": 0.5886201187493544, "learning_rate": 9.96782156184865e-06, "loss": 0.6128, "step": 176 }, { "epoch": 0.08556925308194344, "grad_norm": 0.52487297166769, "learning_rate": 9.966909751789758e-06, "loss": 0.6201, "step": 177 }, { "epoch": 0.08605269518975103, "grad_norm": 0.47488673856360863, "learning_rate": 9.965985245909226e-06, "loss": 0.581, "step": 178 }, { "epoch": 0.08653613729755862, "grad_norm": 0.5390345004627665, "learning_rate": 9.96504804657017e-06, "loss": 0.5748, "step": 179 }, { "epoch": 0.08701957940536621, "grad_norm": 0.5030595297893009, "learning_rate": 9.964098156168143e-06, "loss": 0.6025, "step": 180 }, { "epoch": 0.0875030215131738, "grad_norm": 0.5468598312459072, "learning_rate": 9.963135577131144e-06, "loss": 0.6086, "step": 181 }, { "epoch": 0.08798646362098139, "grad_norm": 0.48113219800404783, "learning_rate": 9.962160311919601e-06, "loss": 0.5759, "step": 182 }, { "epoch": 0.08846990572878898, "grad_norm": 0.5498772940672643, "learning_rate": 9.96117236302637e-06, "loss": 0.6009, "step": 183 }, { "epoch": 0.08895334783659657, "grad_norm": 0.572150853367621, "learning_rate": 9.960171732976731e-06, "loss": 0.5891, "step": 184 }, { "epoch": 0.08943678994440415, "grad_norm": 0.5440182913032069, "learning_rate": 9.959158424328373e-06, "loss": 0.6126, "step": 185 }, { "epoch": 0.08992023205221175, "grad_norm": 0.5124606491120447, "learning_rate": 9.958132439671392e-06, "loss": 0.6113, "step": 186 }, { "epoch": 0.09040367416001933, "grad_norm": 0.5122426086233111, "learning_rate": 9.957093781628294e-06, "loss": 0.5585, "step": 187 }, { "epoch": 0.09088711626782693, "grad_norm": 0.5466339032920954, "learning_rate": 9.956042452853967e-06, "loss": 0.5829, "step": 188 }, { "epoch": 0.09137055837563451, "grad_norm": 0.5319185267267565, "learning_rate": 9.954978456035695e-06, "loss": 0.6014, "step": 189 }, { "epoch": 0.09185400048344211, "grad_norm": 0.5439360347029544, "learning_rate": 9.953901793893137e-06, "loss": 0.6135, "step": 190 }, { "epoch": 0.0923374425912497, "grad_norm": 0.5572467498872743, "learning_rate": 9.95281246917833e-06, "loss": 0.6126, "step": 191 }, { "epoch": 0.09282088469905729, "grad_norm": 0.5541110285684123, "learning_rate": 9.951710484675677e-06, "loss": 0.6077, "step": 192 }, { "epoch": 0.09330432680686487, "grad_norm": 0.4850481807152515, "learning_rate": 9.950595843201936e-06, "loss": 0.6052, "step": 193 }, { "epoch": 0.09378776891467247, "grad_norm": 0.4982494369774088, "learning_rate": 9.949468547606222e-06, "loss": 0.608, "step": 194 }, { "epoch": 0.09427121102248005, "grad_norm": 0.5222210926075901, "learning_rate": 9.948328600769996e-06, "loss": 0.5725, "step": 195 }, { "epoch": 0.09475465313028765, "grad_norm": 0.5156665548407187, "learning_rate": 9.94717600560705e-06, "loss": 0.5981, "step": 196 }, { "epoch": 0.09523809523809523, "grad_norm": 0.4789398218595176, "learning_rate": 9.946010765063512e-06, "loss": 0.6163, "step": 197 }, { "epoch": 0.09572153734590283, "grad_norm": 0.5066106303118647, "learning_rate": 9.94483288211783e-06, "loss": 0.6049, "step": 198 }, { "epoch": 0.09620497945371041, "grad_norm": 0.519086410125638, "learning_rate": 9.943642359780767e-06, "loss": 0.6034, "step": 199 }, { "epoch": 0.09668842156151801, "grad_norm": 0.5726309849663989, "learning_rate": 9.942439201095398e-06, "loss": 0.5977, "step": 200 }, { "epoch": 0.09717186366932559, "grad_norm": 0.5149014744932526, "learning_rate": 9.941223409137088e-06, "loss": 0.6147, "step": 201 }, { "epoch": 0.09765530577713319, "grad_norm": 0.5009166664227639, "learning_rate": 9.939994987013505e-06, "loss": 0.595, "step": 202 }, { "epoch": 0.09813874788494077, "grad_norm": 0.5677576117209191, "learning_rate": 9.93875393786459e-06, "loss": 0.5825, "step": 203 }, { "epoch": 0.09862218999274837, "grad_norm": 0.5705628060741978, "learning_rate": 9.937500264862567e-06, "loss": 0.6106, "step": 204 }, { "epoch": 0.09910563210055595, "grad_norm": 0.5166084751955315, "learning_rate": 9.936233971211926e-06, "loss": 0.5724, "step": 205 }, { "epoch": 0.09958907420836355, "grad_norm": 0.4998369485071646, "learning_rate": 9.934955060149413e-06, "loss": 0.5702, "step": 206 }, { "epoch": 0.10007251631617115, "grad_norm": 0.4773861112208611, "learning_rate": 9.933663534944029e-06, "loss": 0.5976, "step": 207 }, { "epoch": 0.10055595842397873, "grad_norm": 0.5142399648385931, "learning_rate": 9.932359398897018e-06, "loss": 0.5662, "step": 208 }, { "epoch": 0.10103940053178632, "grad_norm": 0.5152331134346968, "learning_rate": 9.931042655341856e-06, "loss": 0.5987, "step": 209 }, { "epoch": 0.10152284263959391, "grad_norm": 0.5697107336495173, "learning_rate": 9.929713307644245e-06, "loss": 0.5956, "step": 210 }, { "epoch": 0.1020062847474015, "grad_norm": 0.5437386464507225, "learning_rate": 9.928371359202103e-06, "loss": 0.6023, "step": 211 }, { "epoch": 0.10248972685520909, "grad_norm": 0.48930400532530816, "learning_rate": 9.927016813445562e-06, "loss": 0.5941, "step": 212 }, { "epoch": 0.10297316896301668, "grad_norm": 0.5691895002113943, "learning_rate": 9.925649673836949e-06, "loss": 0.5977, "step": 213 }, { "epoch": 0.10345661107082427, "grad_norm": 0.518358121778254, "learning_rate": 9.924269943870781e-06, "loss": 0.599, "step": 214 }, { "epoch": 0.10394005317863186, "grad_norm": 0.5179203447080591, "learning_rate": 9.922877627073763e-06, "loss": 0.565, "step": 215 }, { "epoch": 0.10442349528643945, "grad_norm": 0.5348210146349037, "learning_rate": 9.921472727004765e-06, "loss": 0.6038, "step": 216 }, { "epoch": 0.10490693739424704, "grad_norm": 0.5011388091471438, "learning_rate": 9.920055247254827e-06, "loss": 0.5951, "step": 217 }, { "epoch": 0.10539037950205463, "grad_norm": 0.5706178448892886, "learning_rate": 9.91862519144714e-06, "loss": 0.604, "step": 218 }, { "epoch": 0.10587382160986222, "grad_norm": 0.5667257328777994, "learning_rate": 9.917182563237045e-06, "loss": 0.6006, "step": 219 }, { "epoch": 0.1063572637176698, "grad_norm": 0.5402529870671051, "learning_rate": 9.915727366312012e-06, "loss": 0.591, "step": 220 }, { "epoch": 0.1068407058254774, "grad_norm": 0.5123066262170495, "learning_rate": 9.914259604391642e-06, "loss": 0.5818, "step": 221 }, { "epoch": 0.10732414793328499, "grad_norm": 0.5104812232878251, "learning_rate": 9.912779281227656e-06, "loss": 0.5991, "step": 222 }, { "epoch": 0.10780759004109258, "grad_norm": 0.5073553912253322, "learning_rate": 9.911286400603878e-06, "loss": 0.5783, "step": 223 }, { "epoch": 0.10829103214890017, "grad_norm": 0.5516103650201469, "learning_rate": 9.90978096633623e-06, "loss": 0.6007, "step": 224 }, { "epoch": 0.10877447425670776, "grad_norm": 0.5241670992889956, "learning_rate": 9.908262982272724e-06, "loss": 0.5865, "step": 225 }, { "epoch": 0.10925791636451535, "grad_norm": 0.4894067875331202, "learning_rate": 9.906732452293448e-06, "loss": 0.5635, "step": 226 }, { "epoch": 0.10974135847232294, "grad_norm": 0.5079732216995924, "learning_rate": 9.905189380310564e-06, "loss": 0.5982, "step": 227 }, { "epoch": 0.11022480058013052, "grad_norm": 0.47288266380376864, "learning_rate": 9.903633770268286e-06, "loss": 0.5734, "step": 228 }, { "epoch": 0.11070824268793812, "grad_norm": 0.5195973051222883, "learning_rate": 9.902065626142876e-06, "loss": 0.6021, "step": 229 }, { "epoch": 0.1111916847957457, "grad_norm": 0.5159734590151601, "learning_rate": 9.900484951942642e-06, "loss": 0.5847, "step": 230 }, { "epoch": 0.1116751269035533, "grad_norm": 0.5157347113387764, "learning_rate": 9.89889175170791e-06, "loss": 0.5946, "step": 231 }, { "epoch": 0.11215856901136088, "grad_norm": 0.49409523170190334, "learning_rate": 9.89728602951103e-06, "loss": 0.5941, "step": 232 }, { "epoch": 0.11264201111916848, "grad_norm": 0.5699641967141135, "learning_rate": 9.89566778945636e-06, "loss": 0.5965, "step": 233 }, { "epoch": 0.11312545322697606, "grad_norm": 0.5565932357020583, "learning_rate": 9.894037035680246e-06, "loss": 0.6076, "step": 234 }, { "epoch": 0.11360889533478366, "grad_norm": 0.4762368359891958, "learning_rate": 9.892393772351033e-06, "loss": 0.5749, "step": 235 }, { "epoch": 0.11409233744259124, "grad_norm": 0.5226269336653058, "learning_rate": 9.890738003669029e-06, "loss": 0.5882, "step": 236 }, { "epoch": 0.11457577955039884, "grad_norm": 0.5893232226185929, "learning_rate": 9.889069733866515e-06, "loss": 0.5978, "step": 237 }, { "epoch": 0.11505922165820642, "grad_norm": 0.5556325697280562, "learning_rate": 9.887388967207722e-06, "loss": 0.6, "step": 238 }, { "epoch": 0.11554266376601402, "grad_norm": 0.48160661753964396, "learning_rate": 9.885695707988825e-06, "loss": 0.5977, "step": 239 }, { "epoch": 0.11602610587382162, "grad_norm": 0.5122405505133801, "learning_rate": 9.883989960537934e-06, "loss": 0.6044, "step": 240 }, { "epoch": 0.1165095479816292, "grad_norm": 0.5812889541684825, "learning_rate": 9.882271729215071e-06, "loss": 0.5849, "step": 241 }, { "epoch": 0.1169929900894368, "grad_norm": 0.4906401332764143, "learning_rate": 9.880541018412179e-06, "loss": 0.5986, "step": 242 }, { "epoch": 0.11747643219724438, "grad_norm": 0.48951055967126716, "learning_rate": 9.878797832553093e-06, "loss": 0.5646, "step": 243 }, { "epoch": 0.11795987430505198, "grad_norm": 0.4836474446158179, "learning_rate": 9.877042176093537e-06, "loss": 0.5998, "step": 244 }, { "epoch": 0.11844331641285956, "grad_norm": 0.4962973453940785, "learning_rate": 9.875274053521107e-06, "loss": 0.5846, "step": 245 }, { "epoch": 0.11892675852066716, "grad_norm": 0.45261755838242107, "learning_rate": 9.873493469355271e-06, "loss": 0.5912, "step": 246 }, { "epoch": 0.11941020062847474, "grad_norm": 0.49934013758424506, "learning_rate": 9.871700428147342e-06, "loss": 0.5836, "step": 247 }, { "epoch": 0.11989364273628234, "grad_norm": 0.48318779237357384, "learning_rate": 9.86989493448048e-06, "loss": 0.5898, "step": 248 }, { "epoch": 0.12037708484408992, "grad_norm": 0.4877998807669757, "learning_rate": 9.868076992969672e-06, "loss": 0.5933, "step": 249 }, { "epoch": 0.12086052695189752, "grad_norm": 0.4697579805390032, "learning_rate": 9.866246608261725e-06, "loss": 0.5855, "step": 250 }, { "epoch": 0.1213439690597051, "grad_norm": 0.512552573820198, "learning_rate": 9.864403785035246e-06, "loss": 0.5989, "step": 251 }, { "epoch": 0.1218274111675127, "grad_norm": 0.4865753568683563, "learning_rate": 9.862548528000644e-06, "loss": 0.5722, "step": 252 }, { "epoch": 0.12231085327532028, "grad_norm": 0.5276925045930954, "learning_rate": 9.860680841900101e-06, "loss": 0.5879, "step": 253 }, { "epoch": 0.12279429538312787, "grad_norm": 0.5213718677505005, "learning_rate": 9.858800731507575e-06, "loss": 0.5999, "step": 254 }, { "epoch": 0.12327773749093546, "grad_norm": 0.5180845494091726, "learning_rate": 9.85690820162878e-06, "loss": 0.586, "step": 255 }, { "epoch": 0.12376117959874305, "grad_norm": 0.5698025401421347, "learning_rate": 9.855003257101177e-06, "loss": 0.6011, "step": 256 }, { "epoch": 0.12424462170655064, "grad_norm": 0.562343589994959, "learning_rate": 9.853085902793952e-06, "loss": 0.5894, "step": 257 }, { "epoch": 0.12472806381435823, "grad_norm": 0.5160827286882833, "learning_rate": 9.851156143608025e-06, "loss": 0.5897, "step": 258 }, { "epoch": 0.12521150592216582, "grad_norm": 0.5407107287832078, "learning_rate": 9.84921398447601e-06, "loss": 0.59, "step": 259 }, { "epoch": 0.1256949480299734, "grad_norm": 0.4828245059112851, "learning_rate": 9.847259430362222e-06, "loss": 0.5642, "step": 260 }, { "epoch": 0.126178390137781, "grad_norm": 0.5766667340207283, "learning_rate": 9.845292486262664e-06, "loss": 0.6016, "step": 261 }, { "epoch": 0.12666183224558858, "grad_norm": 0.5818866932241936, "learning_rate": 9.843313157204999e-06, "loss": 0.5807, "step": 262 }, { "epoch": 0.12714527435339618, "grad_norm": 0.5140923007570054, "learning_rate": 9.841321448248552e-06, "loss": 0.5858, "step": 263 }, { "epoch": 0.12762871646120377, "grad_norm": 0.513399510660716, "learning_rate": 9.839317364484295e-06, "loss": 0.5847, "step": 264 }, { "epoch": 0.12811215856901137, "grad_norm": 0.5227642580781724, "learning_rate": 9.837300911034824e-06, "loss": 0.5888, "step": 265 }, { "epoch": 0.12859560067681894, "grad_norm": 0.5579358896097371, "learning_rate": 9.83527209305436e-06, "loss": 0.5928, "step": 266 }, { "epoch": 0.12907904278462654, "grad_norm": 0.5145348442577231, "learning_rate": 9.83323091572872e-06, "loss": 0.5872, "step": 267 }, { "epoch": 0.12956248489243413, "grad_norm": 0.5112821410236051, "learning_rate": 9.831177384275323e-06, "loss": 0.5805, "step": 268 }, { "epoch": 0.13004592700024173, "grad_norm": 0.5497912960403669, "learning_rate": 9.829111503943159e-06, "loss": 0.5837, "step": 269 }, { "epoch": 0.1305293691080493, "grad_norm": 0.5226743950335115, "learning_rate": 9.827033280012783e-06, "loss": 0.5539, "step": 270 }, { "epoch": 0.1310128112158569, "grad_norm": 0.5713921241049837, "learning_rate": 9.824942717796304e-06, "loss": 0.5881, "step": 271 }, { "epoch": 0.1314962533236645, "grad_norm": 0.5241764388189555, "learning_rate": 9.822839822637369e-06, "loss": 0.6032, "step": 272 }, { "epoch": 0.1319796954314721, "grad_norm": 0.5162440352522167, "learning_rate": 9.820724599911147e-06, "loss": 0.5842, "step": 273 }, { "epoch": 0.13246313753927969, "grad_norm": 0.5431692492650363, "learning_rate": 9.818597055024315e-06, "loss": 0.585, "step": 274 }, { "epoch": 0.13294657964708725, "grad_norm": 0.5124783198553914, "learning_rate": 9.816457193415055e-06, "loss": 0.5779, "step": 275 }, { "epoch": 0.13343002175489485, "grad_norm": 0.5257695390265421, "learning_rate": 9.81430502055302e-06, "loss": 0.5798, "step": 276 }, { "epoch": 0.13391346386270245, "grad_norm": 0.49781008962990064, "learning_rate": 9.812140541939338e-06, "loss": 0.5836, "step": 277 }, { "epoch": 0.13439690597051004, "grad_norm": 0.5327804269781539, "learning_rate": 9.809963763106593e-06, "loss": 0.5733, "step": 278 }, { "epoch": 0.1348803480783176, "grad_norm": 0.5167258655366103, "learning_rate": 9.807774689618806e-06, "loss": 0.58, "step": 279 }, { "epoch": 0.1353637901861252, "grad_norm": 0.5321175943512093, "learning_rate": 9.805573327071428e-06, "loss": 0.5911, "step": 280 }, { "epoch": 0.1358472322939328, "grad_norm": 0.49961117510050285, "learning_rate": 9.803359681091313e-06, "loss": 0.5737, "step": 281 }, { "epoch": 0.1363306744017404, "grad_norm": 0.5314962622355859, "learning_rate": 9.801133757336726e-06, "loss": 0.593, "step": 282 }, { "epoch": 0.13681411650954797, "grad_norm": 0.48173417582091976, "learning_rate": 9.798895561497299e-06, "loss": 0.5818, "step": 283 }, { "epoch": 0.13729755861735557, "grad_norm": 0.5127693228983886, "learning_rate": 9.796645099294049e-06, "loss": 0.6024, "step": 284 }, { "epoch": 0.13778100072516317, "grad_norm": 0.5128313174228813, "learning_rate": 9.794382376479334e-06, "loss": 0.5837, "step": 285 }, { "epoch": 0.13826444283297076, "grad_norm": 0.502862882638082, "learning_rate": 9.792107398836859e-06, "loss": 0.5781, "step": 286 }, { "epoch": 0.13874788494077833, "grad_norm": 0.5169656633134686, "learning_rate": 9.789820172181648e-06, "loss": 0.5821, "step": 287 }, { "epoch": 0.13923132704858593, "grad_norm": 6.7246508188992, "learning_rate": 9.787520702360035e-06, "loss": 1.0972, "step": 288 }, { "epoch": 0.13971476915639353, "grad_norm": 0.6005251051430991, "learning_rate": 9.785208995249655e-06, "loss": 0.5803, "step": 289 }, { "epoch": 0.14019821126420112, "grad_norm": 0.5531574758650235, "learning_rate": 9.782885056759413e-06, "loss": 0.563, "step": 290 }, { "epoch": 0.1406816533720087, "grad_norm": 0.5273779406180227, "learning_rate": 9.780548892829486e-06, "loss": 0.5872, "step": 291 }, { "epoch": 0.1411650954798163, "grad_norm": 0.5063770192301159, "learning_rate": 9.778200509431297e-06, "loss": 0.5782, "step": 292 }, { "epoch": 0.14164853758762389, "grad_norm": 0.5401099132225082, "learning_rate": 9.775839912567502e-06, "loss": 0.5804, "step": 293 }, { "epoch": 0.14213197969543148, "grad_norm": 0.607784811294971, "learning_rate": 9.773467108271978e-06, "loss": 0.5831, "step": 294 }, { "epoch": 0.14261542180323905, "grad_norm": 0.5051370116219928, "learning_rate": 9.771082102609803e-06, "loss": 0.5597, "step": 295 }, { "epoch": 0.14309886391104665, "grad_norm": 0.5723810352863865, "learning_rate": 9.768684901677245e-06, "loss": 0.5779, "step": 296 }, { "epoch": 0.14358230601885424, "grad_norm": 0.529491415132923, "learning_rate": 9.766275511601742e-06, "loss": 0.5849, "step": 297 }, { "epoch": 0.14406574812666184, "grad_norm": 0.6275998382003428, "learning_rate": 9.763853938541887e-06, "loss": 0.5915, "step": 298 }, { "epoch": 0.1445491902344694, "grad_norm": 0.5906428033404255, "learning_rate": 9.76142018868742e-06, "loss": 0.5816, "step": 299 }, { "epoch": 0.145032632342277, "grad_norm": 0.597638837356143, "learning_rate": 9.7589742682592e-06, "loss": 0.5578, "step": 300 }, { "epoch": 0.1455160744500846, "grad_norm": 0.5365546900890564, "learning_rate": 9.756516183509198e-06, "loss": 0.5833, "step": 301 }, { "epoch": 0.1459995165578922, "grad_norm": 0.554155920273677, "learning_rate": 9.754045940720471e-06, "loss": 0.581, "step": 302 }, { "epoch": 0.14648295866569977, "grad_norm": 0.5290449152773149, "learning_rate": 9.751563546207167e-06, "loss": 0.5879, "step": 303 }, { "epoch": 0.14696640077350737, "grad_norm": 0.5303051981230842, "learning_rate": 9.749069006314481e-06, "loss": 0.557, "step": 304 }, { "epoch": 0.14744984288131496, "grad_norm": 0.4750712434505446, "learning_rate": 9.74656232741866e-06, "loss": 0.5236, "step": 305 }, { "epoch": 0.14793328498912256, "grad_norm": 0.515780571537496, "learning_rate": 9.744043515926975e-06, "loss": 0.5827, "step": 306 }, { "epoch": 0.14841672709693013, "grad_norm": 0.5886066507830542, "learning_rate": 9.741512578277715e-06, "loss": 0.5741, "step": 307 }, { "epoch": 0.14890016920473773, "grad_norm": 0.5712616310834069, "learning_rate": 9.738969520940158e-06, "loss": 0.587, "step": 308 }, { "epoch": 0.14938361131254532, "grad_norm": 0.5883909446108012, "learning_rate": 9.736414350414564e-06, "loss": 0.5836, "step": 309 }, { "epoch": 0.14986705342035292, "grad_norm": 0.49300111186175044, "learning_rate": 9.733847073232156e-06, "loss": 0.583, "step": 310 }, { "epoch": 0.15035049552816052, "grad_norm": 0.47057695692490953, "learning_rate": 9.7312676959551e-06, "loss": 0.5433, "step": 311 }, { "epoch": 0.15083393763596809, "grad_norm": 0.5647156070035382, "learning_rate": 9.72867622517649e-06, "loss": 0.5859, "step": 312 }, { "epoch": 0.15131737974377568, "grad_norm": 0.5698749374107666, "learning_rate": 9.726072667520338e-06, "loss": 0.5759, "step": 313 }, { "epoch": 0.15180082185158328, "grad_norm": 0.4935935341959304, "learning_rate": 9.723457029641547e-06, "loss": 0.5883, "step": 314 }, { "epoch": 0.15228426395939088, "grad_norm": 0.5040267732247843, "learning_rate": 9.720829318225897e-06, "loss": 0.5723, "step": 315 }, { "epoch": 0.15276770606719844, "grad_norm": 0.5390674583456238, "learning_rate": 9.718189539990029e-06, "loss": 0.5748, "step": 316 }, { "epoch": 0.15325114817500604, "grad_norm": 0.5449958057788811, "learning_rate": 9.715537701681431e-06, "loss": 0.5831, "step": 317 }, { "epoch": 0.15373459028281364, "grad_norm": 0.48895966772949706, "learning_rate": 9.712873810078415e-06, "loss": 0.5505, "step": 318 }, { "epoch": 0.15421803239062123, "grad_norm": 0.5694877152526486, "learning_rate": 9.710197871990101e-06, "loss": 0.5789, "step": 319 }, { "epoch": 0.1547014744984288, "grad_norm": 0.5390854150150773, "learning_rate": 9.707509894256406e-06, "loss": 0.5699, "step": 320 }, { "epoch": 0.1551849166062364, "grad_norm": 0.5339825765060972, "learning_rate": 9.704809883748012e-06, "loss": 0.5841, "step": 321 }, { "epoch": 0.155668358714044, "grad_norm": 0.5691147363910026, "learning_rate": 9.70209784736637e-06, "loss": 0.5791, "step": 322 }, { "epoch": 0.1561518008218516, "grad_norm": 0.5098897525025804, "learning_rate": 9.699373792043658e-06, "loss": 0.5789, "step": 323 }, { "epoch": 0.15663524292965916, "grad_norm": 0.5233093422091403, "learning_rate": 9.696637724742785e-06, "loss": 0.5791, "step": 324 }, { "epoch": 0.15711868503746676, "grad_norm": 0.4951608627676522, "learning_rate": 9.693889652457359e-06, "loss": 0.5664, "step": 325 }, { "epoch": 0.15760212714527436, "grad_norm": 0.5085606430384619, "learning_rate": 9.691129582211671e-06, "loss": 0.5777, "step": 326 }, { "epoch": 0.15808556925308195, "grad_norm": 0.5137102450781047, "learning_rate": 9.688357521060685e-06, "loss": 0.5843, "step": 327 }, { "epoch": 0.15856901136088952, "grad_norm": 0.4769071854330559, "learning_rate": 9.685573476090015e-06, "loss": 0.578, "step": 328 }, { "epoch": 0.15905245346869712, "grad_norm": 0.542975418114207, "learning_rate": 9.6827774544159e-06, "loss": 0.5859, "step": 329 }, { "epoch": 0.15953589557650472, "grad_norm": 0.4926718305346952, "learning_rate": 9.6799694631852e-06, "loss": 0.5871, "step": 330 }, { "epoch": 0.1600193376843123, "grad_norm": 0.5010989320404932, "learning_rate": 9.677149509575365e-06, "loss": 0.5841, "step": 331 }, { "epoch": 0.16050277979211988, "grad_norm": 0.5446382005351177, "learning_rate": 9.674317600794426e-06, "loss": 0.5762, "step": 332 }, { "epoch": 0.16098622189992748, "grad_norm": 0.5406240370145704, "learning_rate": 9.67147374408097e-06, "loss": 0.5685, "step": 333 }, { "epoch": 0.16146966400773508, "grad_norm": 0.5171074604025283, "learning_rate": 9.66861794670412e-06, "loss": 0.5856, "step": 334 }, { "epoch": 0.16195310611554267, "grad_norm": 0.5545080974369176, "learning_rate": 9.665750215963528e-06, "loss": 0.5789, "step": 335 }, { "epoch": 0.16243654822335024, "grad_norm": 0.49939805294647144, "learning_rate": 9.662870559189344e-06, "loss": 0.5702, "step": 336 }, { "epoch": 0.16291999033115784, "grad_norm": 0.49295646596373777, "learning_rate": 9.6599789837422e-06, "loss": 0.5742, "step": 337 }, { "epoch": 0.16340343243896543, "grad_norm": 0.5522231456414357, "learning_rate": 9.657075497013202e-06, "loss": 0.5752, "step": 338 }, { "epoch": 0.16388687454677303, "grad_norm": 0.5606395929711875, "learning_rate": 9.654160106423891e-06, "loss": 0.5854, "step": 339 }, { "epoch": 0.1643703166545806, "grad_norm": 0.5086990809592122, "learning_rate": 9.651232819426242e-06, "loss": 0.5764, "step": 340 }, { "epoch": 0.1648537587623882, "grad_norm": 0.4984930367771814, "learning_rate": 9.648293643502636e-06, "loss": 0.5619, "step": 341 }, { "epoch": 0.1653372008701958, "grad_norm": 0.5217470426797576, "learning_rate": 9.645342586165845e-06, "loss": 0.5833, "step": 342 }, { "epoch": 0.1658206429780034, "grad_norm": 0.546389261380125, "learning_rate": 9.642379654959006e-06, "loss": 0.5381, "step": 343 }, { "epoch": 0.166304085085811, "grad_norm": 0.5439151860872452, "learning_rate": 9.639404857455614e-06, "loss": 0.5674, "step": 344 }, { "epoch": 0.16678752719361856, "grad_norm": 0.5469688158149608, "learning_rate": 9.63641820125949e-06, "loss": 0.5705, "step": 345 }, { "epoch": 0.16727096930142615, "grad_norm": 0.4994352161741759, "learning_rate": 9.633419694004767e-06, "loss": 0.555, "step": 346 }, { "epoch": 0.16775441140923375, "grad_norm": 0.5270157823994652, "learning_rate": 9.63040934335587e-06, "loss": 0.5741, "step": 347 }, { "epoch": 0.16823785351704135, "grad_norm": 0.5302701119307424, "learning_rate": 9.627387157007502e-06, "loss": 0.5775, "step": 348 }, { "epoch": 0.16872129562484892, "grad_norm": 0.5005904286760833, "learning_rate": 9.624353142684611e-06, "loss": 0.5724, "step": 349 }, { "epoch": 0.1692047377326565, "grad_norm": 0.5035595085634601, "learning_rate": 9.621307308142385e-06, "loss": 0.5794, "step": 350 }, { "epoch": 0.1696881798404641, "grad_norm": 0.521381746170865, "learning_rate": 9.618249661166218e-06, "loss": 0.5764, "step": 351 }, { "epoch": 0.1701716219482717, "grad_norm": 0.48214165657815927, "learning_rate": 9.615180209571709e-06, "loss": 0.5804, "step": 352 }, { "epoch": 0.17065506405607928, "grad_norm": 0.47552991671065514, "learning_rate": 9.612098961204617e-06, "loss": 0.5581, "step": 353 }, { "epoch": 0.17113850616388687, "grad_norm": 0.46097880469562935, "learning_rate": 9.609005923940865e-06, "loss": 0.5618, "step": 354 }, { "epoch": 0.17162194827169447, "grad_norm": 0.5629931104502605, "learning_rate": 9.605901105686503e-06, "loss": 0.5694, "step": 355 }, { "epoch": 0.17210539037950207, "grad_norm": 0.5179757776717347, "learning_rate": 9.602784514377701e-06, "loss": 0.5897, "step": 356 }, { "epoch": 0.17258883248730963, "grad_norm": 0.5355839686571028, "learning_rate": 9.599656157980715e-06, "loss": 0.5724, "step": 357 }, { "epoch": 0.17307227459511723, "grad_norm": 0.6350286695754506, "learning_rate": 9.596516044491873e-06, "loss": 0.577, "step": 358 }, { "epoch": 0.17355571670292483, "grad_norm": 0.5295601313068036, "learning_rate": 9.593364181937563e-06, "loss": 0.5834, "step": 359 }, { "epoch": 0.17403915881073242, "grad_norm": 0.5016272467409, "learning_rate": 9.590200578374198e-06, "loss": 0.5848, "step": 360 }, { "epoch": 0.17452260091854, "grad_norm": 0.4734403734457174, "learning_rate": 9.587025241888202e-06, "loss": 0.5629, "step": 361 }, { "epoch": 0.1750060430263476, "grad_norm": 0.5345541955737336, "learning_rate": 9.583838180595993e-06, "loss": 0.5619, "step": 362 }, { "epoch": 0.1754894851341552, "grad_norm": 0.5159159294276754, "learning_rate": 9.580639402643957e-06, "loss": 0.5788, "step": 363 }, { "epoch": 0.17597292724196278, "grad_norm": 0.5475730953848408, "learning_rate": 9.577428916208426e-06, "loss": 0.5758, "step": 364 }, { "epoch": 0.17645636934977035, "grad_norm": 0.5065491502971655, "learning_rate": 9.574206729495662e-06, "loss": 0.5739, "step": 365 }, { "epoch": 0.17693981145757795, "grad_norm": 0.5385122338140608, "learning_rate": 9.570972850741839e-06, "loss": 0.5646, "step": 366 }, { "epoch": 0.17742325356538555, "grad_norm": 0.5282114345918013, "learning_rate": 9.567727288213005e-06, "loss": 0.5809, "step": 367 }, { "epoch": 0.17790669567319314, "grad_norm": 0.5183724179001736, "learning_rate": 9.564470050205084e-06, "loss": 0.5745, "step": 368 }, { "epoch": 0.1783901377810007, "grad_norm": 0.501228022506401, "learning_rate": 9.561201145043835e-06, "loss": 0.5759, "step": 369 }, { "epoch": 0.1788735798888083, "grad_norm": 0.5161478035704796, "learning_rate": 9.557920581084848e-06, "loss": 0.5716, "step": 370 }, { "epoch": 0.1793570219966159, "grad_norm": 0.5508440640900468, "learning_rate": 9.554628366713506e-06, "loss": 0.5681, "step": 371 }, { "epoch": 0.1798404641044235, "grad_norm": 0.4958022642187558, "learning_rate": 9.551324510344972e-06, "loss": 0.5674, "step": 372 }, { "epoch": 0.18032390621223107, "grad_norm": 0.5211800045547449, "learning_rate": 9.548009020424172e-06, "loss": 0.5759, "step": 373 }, { "epoch": 0.18080734832003867, "grad_norm": 0.5234346072417955, "learning_rate": 9.544681905425767e-06, "loss": 0.5761, "step": 374 }, { "epoch": 0.18129079042784627, "grad_norm": 0.5277623761050696, "learning_rate": 9.541343173854128e-06, "loss": 0.5846, "step": 375 }, { "epoch": 0.18177423253565386, "grad_norm": 0.5159488960453931, "learning_rate": 9.537992834243323e-06, "loss": 0.5655, "step": 376 }, { "epoch": 0.18225767464346146, "grad_norm": 0.5036893425002033, "learning_rate": 9.53463089515709e-06, "loss": 0.578, "step": 377 }, { "epoch": 0.18274111675126903, "grad_norm": 0.5500694186101432, "learning_rate": 9.531257365188818e-06, "loss": 0.5683, "step": 378 }, { "epoch": 0.18322455885907662, "grad_norm": 0.4446123327167339, "learning_rate": 9.527872252961518e-06, "loss": 0.5112, "step": 379 }, { "epoch": 0.18370800096688422, "grad_norm": 0.49646226307611685, "learning_rate": 9.524475567127813e-06, "loss": 0.5799, "step": 380 }, { "epoch": 0.18419144307469182, "grad_norm": 0.5455620647014985, "learning_rate": 9.521067316369903e-06, "loss": 0.5601, "step": 381 }, { "epoch": 0.1846748851824994, "grad_norm": 0.5073331374598753, "learning_rate": 9.517647509399555e-06, "loss": 0.5399, "step": 382 }, { "epoch": 0.18515832729030698, "grad_norm": 0.5171824333562809, "learning_rate": 9.514216154958067e-06, "loss": 0.5754, "step": 383 }, { "epoch": 0.18564176939811458, "grad_norm": 0.5085818096253197, "learning_rate": 9.510773261816261e-06, "loss": 0.5623, "step": 384 }, { "epoch": 0.18612521150592218, "grad_norm": 0.50056273177622, "learning_rate": 9.507318838774448e-06, "loss": 0.5774, "step": 385 }, { "epoch": 0.18660865361372975, "grad_norm": 0.5493241761943409, "learning_rate": 9.50385289466241e-06, "loss": 0.5698, "step": 386 }, { "epoch": 0.18709209572153734, "grad_norm": 0.48083872272472233, "learning_rate": 9.500375438339384e-06, "loss": 0.5634, "step": 387 }, { "epoch": 0.18757553782934494, "grad_norm": 0.48598643847981954, "learning_rate": 9.496886478694025e-06, "loss": 0.5642, "step": 388 }, { "epoch": 0.18805897993715254, "grad_norm": 0.4945695421669264, "learning_rate": 9.493386024644396e-06, "loss": 0.5763, "step": 389 }, { "epoch": 0.1885424220449601, "grad_norm": 0.4630609260733735, "learning_rate": 9.48987408513794e-06, "loss": 0.5667, "step": 390 }, { "epoch": 0.1890258641527677, "grad_norm": 0.5178132025025237, "learning_rate": 9.486350669151455e-06, "loss": 0.5633, "step": 391 }, { "epoch": 0.1895093062605753, "grad_norm": 0.4855261545618926, "learning_rate": 9.482815785691082e-06, "loss": 0.5705, "step": 392 }, { "epoch": 0.1899927483683829, "grad_norm": 0.48580056178653924, "learning_rate": 9.47926944379226e-06, "loss": 0.5703, "step": 393 }, { "epoch": 0.19047619047619047, "grad_norm": 0.5308237684959329, "learning_rate": 9.475711652519732e-06, "loss": 0.5583, "step": 394 }, { "epoch": 0.19095963258399806, "grad_norm": 0.5127712618313278, "learning_rate": 9.472142420967496e-06, "loss": 0.5674, "step": 395 }, { "epoch": 0.19144307469180566, "grad_norm": 0.4833488281294125, "learning_rate": 9.468561758258795e-06, "loss": 0.578, "step": 396 }, { "epoch": 0.19192651679961326, "grad_norm": 0.5644191416840888, "learning_rate": 9.464969673546092e-06, "loss": 0.582, "step": 397 }, { "epoch": 0.19240995890742082, "grad_norm": 0.5561428050479044, "learning_rate": 9.461366176011047e-06, "loss": 0.5762, "step": 398 }, { "epoch": 0.19289340101522842, "grad_norm": 0.49800634280761286, "learning_rate": 9.457751274864486e-06, "loss": 0.5786, "step": 399 }, { "epoch": 0.19337684312303602, "grad_norm": 0.464098426014889, "learning_rate": 9.454124979346392e-06, "loss": 0.531, "step": 400 }, { "epoch": 0.19386028523084362, "grad_norm": 0.5317711530861378, "learning_rate": 9.450487298725866e-06, "loss": 0.5735, "step": 401 }, { "epoch": 0.19434372733865118, "grad_norm": 0.5700860255634325, "learning_rate": 9.446838242301113e-06, "loss": 0.5736, "step": 402 }, { "epoch": 0.19482716944645878, "grad_norm": 0.5415575586047788, "learning_rate": 9.443177819399416e-06, "loss": 0.5682, "step": 403 }, { "epoch": 0.19531061155426638, "grad_norm": 0.45162964809703743, "learning_rate": 9.439506039377111e-06, "loss": 0.5457, "step": 404 }, { "epoch": 0.19579405366207397, "grad_norm": 0.48073200361222107, "learning_rate": 9.435822911619564e-06, "loss": 0.5452, "step": 405 }, { "epoch": 0.19627749576988154, "grad_norm": 0.5218011226870963, "learning_rate": 9.432128445541147e-06, "loss": 0.5569, "step": 406 }, { "epoch": 0.19676093787768914, "grad_norm": 0.5241766492312198, "learning_rate": 9.42842265058521e-06, "loss": 0.5791, "step": 407 }, { "epoch": 0.19724437998549674, "grad_norm": 0.4747479232641684, "learning_rate": 9.424705536224065e-06, "loss": 0.572, "step": 408 }, { "epoch": 0.19772782209330433, "grad_norm": 0.4892195750767198, "learning_rate": 9.420977111958957e-06, "loss": 0.577, "step": 409 }, { "epoch": 0.1982112642011119, "grad_norm": 0.49625147154018395, "learning_rate": 9.41723738732004e-06, "loss": 0.5673, "step": 410 }, { "epoch": 0.1986947063089195, "grad_norm": 0.553969116933997, "learning_rate": 9.41348637186635e-06, "loss": 0.5805, "step": 411 }, { "epoch": 0.1991781484167271, "grad_norm": 0.5271833056864474, "learning_rate": 9.409724075185782e-06, "loss": 0.5811, "step": 412 }, { "epoch": 0.1996615905245347, "grad_norm": 0.541152410560869, "learning_rate": 9.405950506895074e-06, "loss": 0.5539, "step": 413 }, { "epoch": 0.2001450326323423, "grad_norm": 0.4827367980584999, "learning_rate": 9.40216567663977e-06, "loss": 0.5754, "step": 414 }, { "epoch": 0.20062847474014986, "grad_norm": 0.49177545628835745, "learning_rate": 9.398369594094198e-06, "loss": 0.508, "step": 415 }, { "epoch": 0.20111191684795746, "grad_norm": 0.50467312755319, "learning_rate": 9.394562268961454e-06, "loss": 0.5681, "step": 416 }, { "epoch": 0.20159535895576505, "grad_norm": 0.4916777572033636, "learning_rate": 9.390743710973366e-06, "loss": 0.575, "step": 417 }, { "epoch": 0.20207880106357265, "grad_norm": 0.5183550927798377, "learning_rate": 9.386913929890478e-06, "loss": 0.57, "step": 418 }, { "epoch": 0.20256224317138022, "grad_norm": 0.47362092706218123, "learning_rate": 9.383072935502018e-06, "loss": 0.5644, "step": 419 }, { "epoch": 0.20304568527918782, "grad_norm": 0.49530019201729136, "learning_rate": 9.379220737625877e-06, "loss": 0.564, "step": 420 }, { "epoch": 0.2035291273869954, "grad_norm": 0.5108751966700111, "learning_rate": 9.375357346108583e-06, "loss": 0.5602, "step": 421 }, { "epoch": 0.204012569494803, "grad_norm": 0.5230318233484302, "learning_rate": 9.371482770825277e-06, "loss": 0.5695, "step": 422 }, { "epoch": 0.20449601160261058, "grad_norm": 0.4802393361720882, "learning_rate": 9.367597021679686e-06, "loss": 0.5661, "step": 423 }, { "epoch": 0.20497945371041817, "grad_norm": 0.541773712373739, "learning_rate": 9.363700108604096e-06, "loss": 0.5582, "step": 424 }, { "epoch": 0.20546289581822577, "grad_norm": 0.5110126727655455, "learning_rate": 9.359792041559334e-06, "loss": 0.5645, "step": 425 }, { "epoch": 0.20594633792603337, "grad_norm": 0.5486480496411716, "learning_rate": 9.35587283053473e-06, "loss": 0.5677, "step": 426 }, { "epoch": 0.20642978003384094, "grad_norm": 0.5379779057549923, "learning_rate": 9.351942485548109e-06, "loss": 0.5435, "step": 427 }, { "epoch": 0.20691322214164853, "grad_norm": 0.5341397558862222, "learning_rate": 9.348001016645744e-06, "loss": 0.5599, "step": 428 }, { "epoch": 0.20739666424945613, "grad_norm": 0.44238086682442823, "learning_rate": 9.344048433902351e-06, "loss": 0.541, "step": 429 }, { "epoch": 0.20788010635726373, "grad_norm": 0.5213851954927032, "learning_rate": 9.340084747421048e-06, "loss": 0.5366, "step": 430 }, { "epoch": 0.2083635484650713, "grad_norm": 0.5349032988779688, "learning_rate": 9.336109967333337e-06, "loss": 0.5571, "step": 431 }, { "epoch": 0.2088469905728789, "grad_norm": 0.4554230771685569, "learning_rate": 9.332124103799075e-06, "loss": 0.5516, "step": 432 }, { "epoch": 0.2093304326806865, "grad_norm": 0.5021585721937876, "learning_rate": 9.328127167006457e-06, "loss": 0.5679, "step": 433 }, { "epoch": 0.2098138747884941, "grad_norm": 0.5025134126056662, "learning_rate": 9.324119167171967e-06, "loss": 0.5659, "step": 434 }, { "epoch": 0.21029731689630166, "grad_norm": 0.48977518403096176, "learning_rate": 9.320100114540382e-06, "loss": 0.5753, "step": 435 }, { "epoch": 0.21078075900410925, "grad_norm": 0.4789181842167065, "learning_rate": 9.316070019384722e-06, "loss": 0.558, "step": 436 }, { "epoch": 0.21126420111191685, "grad_norm": 0.48417362744631853, "learning_rate": 9.312028892006233e-06, "loss": 0.5637, "step": 437 }, { "epoch": 0.21174764321972445, "grad_norm": 0.5040441298097904, "learning_rate": 9.307976742734366e-06, "loss": 0.5603, "step": 438 }, { "epoch": 0.21223108532753202, "grad_norm": 0.5003182083782678, "learning_rate": 9.30391358192674e-06, "loss": 0.5583, "step": 439 }, { "epoch": 0.2127145274353396, "grad_norm": 0.5188458903874932, "learning_rate": 9.299839419969119e-06, "loss": 0.5614, "step": 440 }, { "epoch": 0.2131979695431472, "grad_norm": 0.4990120996823676, "learning_rate": 9.295754267275393e-06, "loss": 0.5732, "step": 441 }, { "epoch": 0.2136814116509548, "grad_norm": 0.43407580533296863, "learning_rate": 9.291658134287537e-06, "loss": 0.5451, "step": 442 }, { "epoch": 0.21416485375876237, "grad_norm": 0.487299832131986, "learning_rate": 9.287551031475604e-06, "loss": 0.5486, "step": 443 }, { "epoch": 0.21464829586656997, "grad_norm": 0.4748601209022523, "learning_rate": 9.283432969337672e-06, "loss": 0.5568, "step": 444 }, { "epoch": 0.21513173797437757, "grad_norm": 0.5116954397180901, "learning_rate": 9.279303958399846e-06, "loss": 0.5561, "step": 445 }, { "epoch": 0.21561518008218516, "grad_norm": 0.5103832796562369, "learning_rate": 9.275164009216205e-06, "loss": 0.5653, "step": 446 }, { "epoch": 0.21609862218999276, "grad_norm": 0.453674255766726, "learning_rate": 9.271013132368799e-06, "loss": 0.5359, "step": 447 }, { "epoch": 0.21658206429780033, "grad_norm": 0.4865827031825044, "learning_rate": 9.266851338467598e-06, "loss": 0.5627, "step": 448 }, { "epoch": 0.21706550640560793, "grad_norm": 0.4474998958247519, "learning_rate": 9.262678638150486e-06, "loss": 0.5372, "step": 449 }, { "epoch": 0.21754894851341552, "grad_norm": 0.5312817145455567, "learning_rate": 9.258495042083222e-06, "loss": 0.583, "step": 450 }, { "epoch": 0.21803239062122312, "grad_norm": 0.5326646088756841, "learning_rate": 9.254300560959413e-06, "loss": 0.5641, "step": 451 }, { "epoch": 0.2185158327290307, "grad_norm": 0.47741110714076435, "learning_rate": 9.25009520550049e-06, "loss": 0.5692, "step": 452 }, { "epoch": 0.2189992748368383, "grad_norm": 0.4992778758439529, "learning_rate": 9.245878986455684e-06, "loss": 0.5732, "step": 453 }, { "epoch": 0.21948271694464588, "grad_norm": 0.5067531688765293, "learning_rate": 9.241651914601986e-06, "loss": 0.5684, "step": 454 }, { "epoch": 0.21996615905245348, "grad_norm": 0.5259329600281596, "learning_rate": 9.237414000744134e-06, "loss": 0.5728, "step": 455 }, { "epoch": 0.22044960116026105, "grad_norm": 0.4912112930780334, "learning_rate": 9.23316525571458e-06, "loss": 0.5543, "step": 456 }, { "epoch": 0.22093304326806865, "grad_norm": 0.4325116439857764, "learning_rate": 9.228905690373456e-06, "loss": 0.5109, "step": 457 }, { "epoch": 0.22141648537587624, "grad_norm": 0.5251969417490432, "learning_rate": 9.224635315608554e-06, "loss": 0.5613, "step": 458 }, { "epoch": 0.22189992748368384, "grad_norm": 0.5371164613513753, "learning_rate": 9.2203541423353e-06, "loss": 0.5758, "step": 459 }, { "epoch": 0.2223833695914914, "grad_norm": 0.49879877094748626, "learning_rate": 9.216062181496712e-06, "loss": 0.5656, "step": 460 }, { "epoch": 0.222866811699299, "grad_norm": 0.4666739038962981, "learning_rate": 9.211759444063392e-06, "loss": 0.5643, "step": 461 }, { "epoch": 0.2233502538071066, "grad_norm": 0.5019702713381807, "learning_rate": 9.207445941033483e-06, "loss": 0.5645, "step": 462 }, { "epoch": 0.2238336959149142, "grad_norm": 0.560484985437826, "learning_rate": 9.203121683432646e-06, "loss": 0.5622, "step": 463 }, { "epoch": 0.22431713802272177, "grad_norm": 0.501701537299382, "learning_rate": 9.19878668231403e-06, "loss": 0.5686, "step": 464 }, { "epoch": 0.22480058013052936, "grad_norm": 0.48640275847390047, "learning_rate": 9.19444094875825e-06, "loss": 0.5617, "step": 465 }, { "epoch": 0.22528402223833696, "grad_norm": 0.5066662929437282, "learning_rate": 9.190084493873353e-06, "loss": 0.5733, "step": 466 }, { "epoch": 0.22576746434614456, "grad_norm": 0.5297511031777309, "learning_rate": 9.185717328794784e-06, "loss": 0.5632, "step": 467 }, { "epoch": 0.22625090645395213, "grad_norm": 0.5778692323663056, "learning_rate": 9.18133946468537e-06, "loss": 0.5684, "step": 468 }, { "epoch": 0.22673434856175972, "grad_norm": 0.5148715492097395, "learning_rate": 9.176950912735287e-06, "loss": 0.5559, "step": 469 }, { "epoch": 0.22721779066956732, "grad_norm": 0.5157447753884506, "learning_rate": 9.172551684162025e-06, "loss": 0.5731, "step": 470 }, { "epoch": 0.22770123277737492, "grad_norm": 0.4783319000473412, "learning_rate": 9.16814179021037e-06, "loss": 0.5671, "step": 471 }, { "epoch": 0.2281846748851825, "grad_norm": 0.5017422895280137, "learning_rate": 9.163721242152362e-06, "loss": 0.5661, "step": 472 }, { "epoch": 0.22866811699299008, "grad_norm": 0.49272124386072536, "learning_rate": 9.159290051287282e-06, "loss": 0.5627, "step": 473 }, { "epoch": 0.22915155910079768, "grad_norm": 0.47471736533769476, "learning_rate": 9.154848228941607e-06, "loss": 0.5615, "step": 474 }, { "epoch": 0.22963500120860528, "grad_norm": 0.5071884927272643, "learning_rate": 9.150395786468998e-06, "loss": 0.5645, "step": 475 }, { "epoch": 0.23011844331641285, "grad_norm": 0.48690399925776484, "learning_rate": 9.14593273525025e-06, "loss": 0.5647, "step": 476 }, { "epoch": 0.23060188542422044, "grad_norm": 0.5041235784595942, "learning_rate": 9.14145908669329e-06, "loss": 0.5729, "step": 477 }, { "epoch": 0.23108532753202804, "grad_norm": 0.5265161224054821, "learning_rate": 9.136974852233118e-06, "loss": 0.5587, "step": 478 }, { "epoch": 0.23156876963983564, "grad_norm": 0.4778337324840926, "learning_rate": 9.132480043331801e-06, "loss": 0.5646, "step": 479 }, { "epoch": 0.23205221174764323, "grad_norm": 0.5036800160533508, "learning_rate": 9.127974671478432e-06, "loss": 0.5655, "step": 480 }, { "epoch": 0.2325356538554508, "grad_norm": 0.4915164507750186, "learning_rate": 9.123458748189105e-06, "loss": 0.5608, "step": 481 }, { "epoch": 0.2330190959632584, "grad_norm": 0.4447947403953834, "learning_rate": 9.118932285006886e-06, "loss": 0.5254, "step": 482 }, { "epoch": 0.233502538071066, "grad_norm": 0.4936810479165672, "learning_rate": 9.114395293501775e-06, "loss": 0.5751, "step": 483 }, { "epoch": 0.2339859801788736, "grad_norm": 0.4933009245810686, "learning_rate": 9.10984778527069e-06, "loss": 0.5603, "step": 484 }, { "epoch": 0.23446942228668116, "grad_norm": 0.4720549987110232, "learning_rate": 9.10528977193743e-06, "loss": 0.5703, "step": 485 }, { "epoch": 0.23495286439448876, "grad_norm": 0.5362136689894559, "learning_rate": 9.100721265152644e-06, "loss": 0.5635, "step": 486 }, { "epoch": 0.23543630650229636, "grad_norm": 0.47602005538977166, "learning_rate": 9.096142276593802e-06, "loss": 0.5721, "step": 487 }, { "epoch": 0.23591974861010395, "grad_norm": 0.48887012727323886, "learning_rate": 9.09155281796517e-06, "loss": 0.5502, "step": 488 }, { "epoch": 0.23640319071791152, "grad_norm": 0.5468866437635687, "learning_rate": 9.086952900997774e-06, "loss": 0.5628, "step": 489 }, { "epoch": 0.23688663282571912, "grad_norm": 0.468285091758703, "learning_rate": 9.082342537449369e-06, "loss": 0.5649, "step": 490 }, { "epoch": 0.23737007493352671, "grad_norm": 0.49449575173177474, "learning_rate": 9.07772173910442e-06, "loss": 0.5363, "step": 491 }, { "epoch": 0.2378535170413343, "grad_norm": 0.5665277859908898, "learning_rate": 9.073090517774057e-06, "loss": 0.5679, "step": 492 }, { "epoch": 0.23833695914914188, "grad_norm": 0.559218042712036, "learning_rate": 9.068448885296057e-06, "loss": 0.5598, "step": 493 }, { "epoch": 0.23882040125694948, "grad_norm": 0.5572180299965971, "learning_rate": 9.063796853534808e-06, "loss": 0.5606, "step": 494 }, { "epoch": 0.23930384336475707, "grad_norm": 0.4852501650353095, "learning_rate": 9.059134434381274e-06, "loss": 0.5614, "step": 495 }, { "epoch": 0.23978728547256467, "grad_norm": 0.5235782249928449, "learning_rate": 9.054461639752976e-06, "loss": 0.5637, "step": 496 }, { "epoch": 0.24027072758037224, "grad_norm": 0.5028533022976227, "learning_rate": 9.049778481593954e-06, "loss": 0.5718, "step": 497 }, { "epoch": 0.24075416968817984, "grad_norm": 0.508045864936268, "learning_rate": 9.045084971874738e-06, "loss": 0.5651, "step": 498 }, { "epoch": 0.24123761179598743, "grad_norm": 0.5575870011120908, "learning_rate": 9.040381122592317e-06, "loss": 0.565, "step": 499 }, { "epoch": 0.24172105390379503, "grad_norm": 0.5201685839473924, "learning_rate": 9.035666945770107e-06, "loss": 0.5593, "step": 500 }, { "epoch": 0.2422044960116026, "grad_norm": 0.48179233555943923, "learning_rate": 9.030942453457928e-06, "loss": 0.5199, "step": 501 }, { "epoch": 0.2426879381194102, "grad_norm": 0.4867208952029737, "learning_rate": 9.02620765773196e-06, "loss": 0.5548, "step": 502 }, { "epoch": 0.2431713802272178, "grad_norm": 0.5240394440690106, "learning_rate": 9.02146257069472e-06, "loss": 0.5611, "step": 503 }, { "epoch": 0.2436548223350254, "grad_norm": 0.48307750050965703, "learning_rate": 9.01670720447504e-06, "loss": 0.5577, "step": 504 }, { "epoch": 0.24413826444283296, "grad_norm": 0.5034030614527921, "learning_rate": 9.011941571228015e-06, "loss": 0.5608, "step": 505 }, { "epoch": 0.24462170655064056, "grad_norm": 0.46379490536223517, "learning_rate": 9.007165683134986e-06, "loss": 0.5315, "step": 506 }, { "epoch": 0.24510514865844815, "grad_norm": 0.5103811282689319, "learning_rate": 9.00237955240351e-06, "loss": 0.5613, "step": 507 }, { "epoch": 0.24558859076625575, "grad_norm": 0.47564392120255755, "learning_rate": 8.997583191267326e-06, "loss": 0.5764, "step": 508 }, { "epoch": 0.24607203287406332, "grad_norm": 0.4811799201923712, "learning_rate": 8.992776611986313e-06, "loss": 0.5704, "step": 509 }, { "epoch": 0.24655547498187091, "grad_norm": 0.4799439081762819, "learning_rate": 8.987959826846479e-06, "loss": 0.5573, "step": 510 }, { "epoch": 0.2470389170896785, "grad_norm": 0.5219349618857427, "learning_rate": 8.983132848159916e-06, "loss": 0.5583, "step": 511 }, { "epoch": 0.2475223591974861, "grad_norm": 0.5010818591918965, "learning_rate": 8.978295688264768e-06, "loss": 0.5699, "step": 512 }, { "epoch": 0.2480058013052937, "grad_norm": 0.5282819201955711, "learning_rate": 8.973448359525207e-06, "loss": 0.5641, "step": 513 }, { "epoch": 0.24848924341310127, "grad_norm": 0.5025819972323563, "learning_rate": 8.968590874331395e-06, "loss": 0.5649, "step": 514 }, { "epoch": 0.24897268552090887, "grad_norm": 0.4880024154213522, "learning_rate": 8.963723245099456e-06, "loss": 0.5533, "step": 515 }, { "epoch": 0.24945612762871647, "grad_norm": 0.4844265343558768, "learning_rate": 8.958845484271443e-06, "loss": 0.5571, "step": 516 }, { "epoch": 0.24993956973652406, "grad_norm": 0.4918270286134992, "learning_rate": 8.953957604315306e-06, "loss": 0.5612, "step": 517 }, { "epoch": 0.25042301184433163, "grad_norm": 0.4532098318099568, "learning_rate": 8.949059617724859e-06, "loss": 0.5532, "step": 518 }, { "epoch": 0.25090645395213923, "grad_norm": 0.4784777680132966, "learning_rate": 8.944151537019752e-06, "loss": 0.5314, "step": 519 }, { "epoch": 0.2513898960599468, "grad_norm": 0.49834032614411844, "learning_rate": 8.939233374745432e-06, "loss": 0.561, "step": 520 }, { "epoch": 0.2518733381677544, "grad_norm": 0.43922831313439964, "learning_rate": 8.934305143473123e-06, "loss": 0.5229, "step": 521 }, { "epoch": 0.252356780275562, "grad_norm": 0.4774051999235377, "learning_rate": 8.929366855799777e-06, "loss": 0.5584, "step": 522 }, { "epoch": 0.2528402223833696, "grad_norm": 0.4860585540987837, "learning_rate": 8.924418524348058e-06, "loss": 0.5722, "step": 523 }, { "epoch": 0.25332366449117716, "grad_norm": 0.476115105724116, "learning_rate": 8.919460161766299e-06, "loss": 0.5527, "step": 524 }, { "epoch": 0.25380710659898476, "grad_norm": 0.49670836036646415, "learning_rate": 8.914491780728471e-06, "loss": 0.565, "step": 525 }, { "epoch": 0.25429054870679235, "grad_norm": 0.49705890206049747, "learning_rate": 8.909513393934162e-06, "loss": 0.5562, "step": 526 }, { "epoch": 0.25477399081459995, "grad_norm": 0.5118474736649574, "learning_rate": 8.904525014108529e-06, "loss": 0.5536, "step": 527 }, { "epoch": 0.25525743292240755, "grad_norm": 0.5301718242423505, "learning_rate": 8.899526654002268e-06, "loss": 0.5612, "step": 528 }, { "epoch": 0.25574087503021514, "grad_norm": 0.4796891269551852, "learning_rate": 8.894518326391595e-06, "loss": 0.5578, "step": 529 }, { "epoch": 0.25622431713802274, "grad_norm": 0.4825310469483714, "learning_rate": 8.889500044078199e-06, "loss": 0.5554, "step": 530 }, { "epoch": 0.25670775924583034, "grad_norm": 0.474416307358851, "learning_rate": 8.88447181988921e-06, "loss": 0.5466, "step": 531 }, { "epoch": 0.2571912013536379, "grad_norm": 0.4754427571901456, "learning_rate": 8.87943366667718e-06, "loss": 0.5232, "step": 532 }, { "epoch": 0.2576746434614455, "grad_norm": 0.5283380707149146, "learning_rate": 8.87438559732003e-06, "loss": 0.5575, "step": 533 }, { "epoch": 0.25815808556925307, "grad_norm": 0.49022668890084664, "learning_rate": 8.869327624721033e-06, "loss": 0.5584, "step": 534 }, { "epoch": 0.25864152767706067, "grad_norm": 0.45329648879294543, "learning_rate": 8.864259761808778e-06, "loss": 0.5557, "step": 535 }, { "epoch": 0.25912496978486826, "grad_norm": 0.5150923796193744, "learning_rate": 8.859182021537126e-06, "loss": 0.5672, "step": 536 }, { "epoch": 0.25960841189267586, "grad_norm": 0.49475203737919254, "learning_rate": 8.854094416885192e-06, "loss": 0.5513, "step": 537 }, { "epoch": 0.26009185400048346, "grad_norm": 0.48640723658571816, "learning_rate": 8.848996960857308e-06, "loss": 0.5542, "step": 538 }, { "epoch": 0.26057529610829105, "grad_norm": 0.5011403090647114, "learning_rate": 8.843889666482977e-06, "loss": 0.5503, "step": 539 }, { "epoch": 0.2610587382160986, "grad_norm": 0.45868293065964316, "learning_rate": 8.838772546816857e-06, "loss": 0.5245, "step": 540 }, { "epoch": 0.2615421803239062, "grad_norm": 0.5028670832415251, "learning_rate": 8.833645614938716e-06, "loss": 0.563, "step": 541 }, { "epoch": 0.2620256224317138, "grad_norm": 0.4652958998559184, "learning_rate": 8.82850888395341e-06, "loss": 0.5214, "step": 542 }, { "epoch": 0.2625090645395214, "grad_norm": 0.48814680090193757, "learning_rate": 8.823362366990833e-06, "loss": 0.5539, "step": 543 }, { "epoch": 0.262992506647329, "grad_norm": 0.4686742850265713, "learning_rate": 8.818206077205899e-06, "loss": 0.5432, "step": 544 }, { "epoch": 0.2634759487551366, "grad_norm": 0.44177435010013455, "learning_rate": 8.8130400277785e-06, "loss": 0.5432, "step": 545 }, { "epoch": 0.2639593908629442, "grad_norm": 0.48745119596264225, "learning_rate": 8.807864231913475e-06, "loss": 0.5609, "step": 546 }, { "epoch": 0.2644428329707518, "grad_norm": 0.5387031701921053, "learning_rate": 8.802678702840575e-06, "loss": 0.5608, "step": 547 }, { "epoch": 0.26492627507855937, "grad_norm": 0.47706550642594997, "learning_rate": 8.79748345381443e-06, "loss": 0.5487, "step": 548 }, { "epoch": 0.2654097171863669, "grad_norm": 0.4694250929319588, "learning_rate": 8.792278498114517e-06, "loss": 0.549, "step": 549 }, { "epoch": 0.2658931592941745, "grad_norm": 0.4937111232536657, "learning_rate": 8.78706384904512e-06, "loss": 0.5564, "step": 550 }, { "epoch": 0.2663766014019821, "grad_norm": 0.468357648344204, "learning_rate": 8.7818395199353e-06, "loss": 0.5546, "step": 551 }, { "epoch": 0.2668600435097897, "grad_norm": 0.4713572915315673, "learning_rate": 8.77660552413887e-06, "loss": 0.5512, "step": 552 }, { "epoch": 0.2673434856175973, "grad_norm": 0.4740909938486332, "learning_rate": 8.77136187503434e-06, "loss": 0.5631, "step": 553 }, { "epoch": 0.2678269277254049, "grad_norm": 0.4472174307551216, "learning_rate": 8.766108586024904e-06, "loss": 0.5222, "step": 554 }, { "epoch": 0.2683103698332125, "grad_norm": 0.4548110236983466, "learning_rate": 8.760845670538387e-06, "loss": 0.5485, "step": 555 }, { "epoch": 0.2687938119410201, "grad_norm": 0.5173119662805489, "learning_rate": 8.755573142027228e-06, "loss": 0.5624, "step": 556 }, { "epoch": 0.26927725404882763, "grad_norm": 0.4812632123799694, "learning_rate": 8.750291013968432e-06, "loss": 0.5562, "step": 557 }, { "epoch": 0.2697606961566352, "grad_norm": 0.472663174890125, "learning_rate": 8.744999299863549e-06, "loss": 0.5669, "step": 558 }, { "epoch": 0.2702441382644428, "grad_norm": 0.463122081686998, "learning_rate": 8.739698013238625e-06, "loss": 0.557, "step": 559 }, { "epoch": 0.2707275803722504, "grad_norm": 0.5188284707009508, "learning_rate": 8.734387167644171e-06, "loss": 0.5202, "step": 560 }, { "epoch": 0.271211022480058, "grad_norm": 0.49659334079030504, "learning_rate": 8.729066776655144e-06, "loss": 0.5605, "step": 561 }, { "epoch": 0.2716944645878656, "grad_norm": 0.4433765304016552, "learning_rate": 8.723736853870888e-06, "loss": 0.5193, "step": 562 }, { "epoch": 0.2721779066956732, "grad_norm": 0.46285084832800716, "learning_rate": 8.718397412915114e-06, "loss": 0.5583, "step": 563 }, { "epoch": 0.2726613488034808, "grad_norm": 0.4406166472711255, "learning_rate": 8.713048467435865e-06, "loss": 0.5365, "step": 564 }, { "epoch": 0.27314479091128835, "grad_norm": 0.49591339367367465, "learning_rate": 8.707690031105478e-06, "loss": 0.5638, "step": 565 }, { "epoch": 0.27362823301909595, "grad_norm": 0.47717175741546425, "learning_rate": 8.702322117620547e-06, "loss": 0.5375, "step": 566 }, { "epoch": 0.27411167512690354, "grad_norm": 0.49399204569955096, "learning_rate": 8.696944740701891e-06, "loss": 0.5502, "step": 567 }, { "epoch": 0.27459511723471114, "grad_norm": 0.462084403002843, "learning_rate": 8.69155791409452e-06, "loss": 0.549, "step": 568 }, { "epoch": 0.27507855934251874, "grad_norm": 0.4733870628371529, "learning_rate": 8.686161651567596e-06, "loss": 0.5479, "step": 569 }, { "epoch": 0.27556200145032633, "grad_norm": 0.4586305030542931, "learning_rate": 8.6807559669144e-06, "loss": 0.517, "step": 570 }, { "epoch": 0.27604544355813393, "grad_norm": 0.4786603573138254, "learning_rate": 8.6753408739523e-06, "loss": 0.5449, "step": 571 }, { "epoch": 0.2765288856659415, "grad_norm": 0.4923356872720239, "learning_rate": 8.669916386522708e-06, "loss": 0.5516, "step": 572 }, { "epoch": 0.27701232777374907, "grad_norm": 0.47497918747290174, "learning_rate": 8.664482518491053e-06, "loss": 0.5527, "step": 573 }, { "epoch": 0.27749576988155666, "grad_norm": 0.5463551243922615, "learning_rate": 8.659039283746738e-06, "loss": 0.5528, "step": 574 }, { "epoch": 0.27797921198936426, "grad_norm": 0.5125817786426824, "learning_rate": 8.653586696203111e-06, "loss": 0.5428, "step": 575 }, { "epoch": 0.27846265409717186, "grad_norm": 0.46930359618316736, "learning_rate": 8.648124769797424e-06, "loss": 0.5566, "step": 576 }, { "epoch": 0.27894609620497945, "grad_norm": 0.48871061545968875, "learning_rate": 8.6426535184908e-06, "loss": 0.5517, "step": 577 }, { "epoch": 0.27942953831278705, "grad_norm": 0.49517469382405177, "learning_rate": 8.637172956268203e-06, "loss": 0.5537, "step": 578 }, { "epoch": 0.27991298042059465, "grad_norm": 0.46885534036424203, "learning_rate": 8.631683097138386e-06, "loss": 0.5455, "step": 579 }, { "epoch": 0.28039642252840224, "grad_norm": 0.4404595141316285, "learning_rate": 8.626183955133876e-06, "loss": 0.5216, "step": 580 }, { "epoch": 0.2808798646362098, "grad_norm": 0.45805738086780906, "learning_rate": 8.620675544310921e-06, "loss": 0.5483, "step": 581 }, { "epoch": 0.2813633067440174, "grad_norm": 0.43283074014496, "learning_rate": 8.615157878749462e-06, "loss": 0.546, "step": 582 }, { "epoch": 0.281846748851825, "grad_norm": 0.45714306807295113, "learning_rate": 8.609630972553098e-06, "loss": 0.5521, "step": 583 }, { "epoch": 0.2823301909596326, "grad_norm": 0.47311376331001226, "learning_rate": 8.604094839849047e-06, "loss": 0.5586, "step": 584 }, { "epoch": 0.2828136330674402, "grad_norm": 0.4424955765808361, "learning_rate": 8.598549494788111e-06, "loss": 0.5384, "step": 585 }, { "epoch": 0.28329707517524777, "grad_norm": 0.467505480407099, "learning_rate": 8.592994951544637e-06, "loss": 0.5368, "step": 586 }, { "epoch": 0.28378051728305537, "grad_norm": 0.48553503600686004, "learning_rate": 8.587431224316488e-06, "loss": 0.5475, "step": 587 }, { "epoch": 0.28426395939086296, "grad_norm": 0.46529868946828945, "learning_rate": 8.581858327324996e-06, "loss": 0.5212, "step": 588 }, { "epoch": 0.28474740149867056, "grad_norm": 0.4898248932325677, "learning_rate": 8.576276274814936e-06, "loss": 0.553, "step": 589 }, { "epoch": 0.2852308436064781, "grad_norm": 0.4455201034159363, "learning_rate": 8.570685081054487e-06, "loss": 0.5216, "step": 590 }, { "epoch": 0.2857142857142857, "grad_norm": 0.46516198660507346, "learning_rate": 8.565084760335188e-06, "loss": 0.5505, "step": 591 }, { "epoch": 0.2861977278220933, "grad_norm": 0.5006509760317717, "learning_rate": 8.559475326971907e-06, "loss": 0.551, "step": 592 }, { "epoch": 0.2866811699299009, "grad_norm": 0.4994980078510237, "learning_rate": 8.553856795302815e-06, "loss": 0.5421, "step": 593 }, { "epoch": 0.2871646120377085, "grad_norm": 0.5029150812228765, "learning_rate": 8.548229179689325e-06, "loss": 0.5519, "step": 594 }, { "epoch": 0.2876480541455161, "grad_norm": 0.5073270485472724, "learning_rate": 8.54259249451608e-06, "loss": 0.5537, "step": 595 }, { "epoch": 0.2881314962533237, "grad_norm": 0.45709172284548705, "learning_rate": 8.536946754190903e-06, "loss": 0.564, "step": 596 }, { "epoch": 0.2886149383611313, "grad_norm": 0.47174309410425874, "learning_rate": 8.531291973144755e-06, "loss": 0.5452, "step": 597 }, { "epoch": 0.2890983804689388, "grad_norm": 0.46639713589843634, "learning_rate": 8.52562816583172e-06, "loss": 0.5509, "step": 598 }, { "epoch": 0.2895818225767464, "grad_norm": 0.4508036851803557, "learning_rate": 8.519955346728939e-06, "loss": 0.5428, "step": 599 }, { "epoch": 0.290065264684554, "grad_norm": 0.44468353218524803, "learning_rate": 8.5142735303366e-06, "loss": 0.5205, "step": 600 }, { "epoch": 0.2905487067923616, "grad_norm": 0.4850164433619974, "learning_rate": 8.50858273117788e-06, "loss": 0.5476, "step": 601 }, { "epoch": 0.2910321489001692, "grad_norm": 0.49303870805000655, "learning_rate": 8.502882963798923e-06, "loss": 0.545, "step": 602 }, { "epoch": 0.2915155910079768, "grad_norm": 0.47572858582093197, "learning_rate": 8.497174242768792e-06, "loss": 0.5515, "step": 603 }, { "epoch": 0.2919990331157844, "grad_norm": 0.5284607359345597, "learning_rate": 8.49145658267944e-06, "loss": 0.5453, "step": 604 }, { "epoch": 0.292482475223592, "grad_norm": 0.47829654266425203, "learning_rate": 8.485729998145665e-06, "loss": 0.5452, "step": 605 }, { "epoch": 0.29296591733139954, "grad_norm": 0.4503645291799449, "learning_rate": 8.479994503805079e-06, "loss": 0.5536, "step": 606 }, { "epoch": 0.29344935943920714, "grad_norm": 0.4693738299713831, "learning_rate": 8.474250114318066e-06, "loss": 0.5216, "step": 607 }, { "epoch": 0.29393280154701473, "grad_norm": 0.4988674830387375, "learning_rate": 8.468496844367752e-06, "loss": 0.5582, "step": 608 }, { "epoch": 0.29441624365482233, "grad_norm": 0.47627140431869974, "learning_rate": 8.462734708659959e-06, "loss": 0.5511, "step": 609 }, { "epoch": 0.2948996857626299, "grad_norm": 0.43233992742433075, "learning_rate": 8.456963721923166e-06, "loss": 0.5279, "step": 610 }, { "epoch": 0.2953831278704375, "grad_norm": 0.4908070311501362, "learning_rate": 8.451183898908484e-06, "loss": 0.5546, "step": 611 }, { "epoch": 0.2958665699782451, "grad_norm": 0.4519643584485447, "learning_rate": 8.445395254389605e-06, "loss": 0.5221, "step": 612 }, { "epoch": 0.2963500120860527, "grad_norm": 0.48396713453490725, "learning_rate": 8.439597803162773e-06, "loss": 0.5489, "step": 613 }, { "epoch": 0.29683345419386026, "grad_norm": 0.4611763742603572, "learning_rate": 8.433791560046737e-06, "loss": 0.5457, "step": 614 }, { "epoch": 0.29731689630166785, "grad_norm": 0.472544396347692, "learning_rate": 8.427976539882725e-06, "loss": 0.5553, "step": 615 }, { "epoch": 0.29780033840947545, "grad_norm": 0.5058827141310254, "learning_rate": 8.422152757534395e-06, "loss": 0.5435, "step": 616 }, { "epoch": 0.29828378051728305, "grad_norm": 0.4766589825937423, "learning_rate": 8.416320227887805e-06, "loss": 0.5526, "step": 617 }, { "epoch": 0.29876722262509064, "grad_norm": 0.47223702801719897, "learning_rate": 8.410478965851371e-06, "loss": 0.5542, "step": 618 }, { "epoch": 0.29925066473289824, "grad_norm": 0.4819039683875086, "learning_rate": 8.404628986355832e-06, "loss": 0.5546, "step": 619 }, { "epoch": 0.29973410684070584, "grad_norm": 0.49462386708237827, "learning_rate": 8.398770304354203e-06, "loss": 0.5566, "step": 620 }, { "epoch": 0.30021754894851344, "grad_norm": 0.4977376021667819, "learning_rate": 8.39290293482175e-06, "loss": 0.5508, "step": 621 }, { "epoch": 0.30070099105632103, "grad_norm": 0.506465713525892, "learning_rate": 8.387026892755942e-06, "loss": 0.5568, "step": 622 }, { "epoch": 0.3011844331641286, "grad_norm": 0.4712688161265267, "learning_rate": 8.381142193176414e-06, "loss": 0.5489, "step": 623 }, { "epoch": 0.30166787527193617, "grad_norm": 0.4615215470431895, "learning_rate": 8.375248851124937e-06, "loss": 0.5554, "step": 624 }, { "epoch": 0.30215131737974377, "grad_norm": 0.5110895222198079, "learning_rate": 8.369346881665364e-06, "loss": 0.5466, "step": 625 }, { "epoch": 0.30263475948755136, "grad_norm": 0.47157470051165545, "learning_rate": 8.363436299883604e-06, "loss": 0.5644, "step": 626 }, { "epoch": 0.30311820159535896, "grad_norm": 0.4789841431133952, "learning_rate": 8.357517120887586e-06, "loss": 0.5493, "step": 627 }, { "epoch": 0.30360164370316656, "grad_norm": 0.47629409809645545, "learning_rate": 8.351589359807204e-06, "loss": 0.5523, "step": 628 }, { "epoch": 0.30408508581097415, "grad_norm": 0.4618925314784255, "learning_rate": 8.345653031794292e-06, "loss": 0.5348, "step": 629 }, { "epoch": 0.30456852791878175, "grad_norm": 0.500646417496574, "learning_rate": 8.339708152022586e-06, "loss": 0.554, "step": 630 }, { "epoch": 0.3050519700265893, "grad_norm": 0.4553680483630167, "learning_rate": 8.333754735687677e-06, "loss": 0.5489, "step": 631 }, { "epoch": 0.3055354121343969, "grad_norm": 0.4591000336987377, "learning_rate": 8.327792798006977e-06, "loss": 0.5508, "step": 632 }, { "epoch": 0.3060188542422045, "grad_norm": 0.48696012413599493, "learning_rate": 8.321822354219677e-06, "loss": 0.5505, "step": 633 }, { "epoch": 0.3065022963500121, "grad_norm": 0.4952413093498077, "learning_rate": 8.315843419586717e-06, "loss": 0.5574, "step": 634 }, { "epoch": 0.3069857384578197, "grad_norm": 0.4716786308005616, "learning_rate": 8.309856009390732e-06, "loss": 0.5281, "step": 635 }, { "epoch": 0.3074691805656273, "grad_norm": 0.5207738583309734, "learning_rate": 8.303860138936027e-06, "loss": 0.5607, "step": 636 }, { "epoch": 0.3079526226734349, "grad_norm": 0.5226978234399785, "learning_rate": 8.297855823548528e-06, "loss": 0.5565, "step": 637 }, { "epoch": 0.30843606478124247, "grad_norm": 0.49251100209183046, "learning_rate": 8.291843078575752e-06, "loss": 0.5485, "step": 638 }, { "epoch": 0.30891950688905, "grad_norm": 0.4769824051475033, "learning_rate": 8.285821919386758e-06, "loss": 0.5456, "step": 639 }, { "epoch": 0.3094029489968576, "grad_norm": 0.503019530780954, "learning_rate": 8.279792361372114e-06, "loss": 0.5602, "step": 640 }, { "epoch": 0.3098863911046652, "grad_norm": 0.48405162661408385, "learning_rate": 8.273754419943856e-06, "loss": 0.5536, "step": 641 }, { "epoch": 0.3103698332124728, "grad_norm": 0.4657304337869963, "learning_rate": 8.267708110535449e-06, "loss": 0.5477, "step": 642 }, { "epoch": 0.3108532753202804, "grad_norm": 0.5106373655355231, "learning_rate": 8.26165344860175e-06, "loss": 0.571, "step": 643 }, { "epoch": 0.311336717428088, "grad_norm": 0.4854760780132044, "learning_rate": 8.255590449618958e-06, "loss": 0.546, "step": 644 }, { "epoch": 0.3118201595358956, "grad_norm": 0.4817908473273075, "learning_rate": 8.24951912908459e-06, "loss": 0.5446, "step": 645 }, { "epoch": 0.3123036016437032, "grad_norm": 0.4792564537130554, "learning_rate": 8.243439502517432e-06, "loss": 0.5352, "step": 646 }, { "epoch": 0.31278704375151073, "grad_norm": 0.5188490831185355, "learning_rate": 8.237351585457499e-06, "loss": 0.5298, "step": 647 }, { "epoch": 0.3132704858593183, "grad_norm": 0.5232755336111542, "learning_rate": 8.231255393465993e-06, "loss": 0.5387, "step": 648 }, { "epoch": 0.3137539279671259, "grad_norm": 0.48933101067554713, "learning_rate": 8.225150942125278e-06, "loss": 0.5156, "step": 649 }, { "epoch": 0.3142373700749335, "grad_norm": 0.47579138598403903, "learning_rate": 8.21903824703882e-06, "loss": 0.552, "step": 650 }, { "epoch": 0.3147208121827411, "grad_norm": 0.47742859766681844, "learning_rate": 8.21291732383116e-06, "loss": 0.5498, "step": 651 }, { "epoch": 0.3152042542905487, "grad_norm": 0.5282098462854927, "learning_rate": 8.206788188147874e-06, "loss": 0.5327, "step": 652 }, { "epoch": 0.3156876963983563, "grad_norm": 0.4655983753785802, "learning_rate": 8.200650855655525e-06, "loss": 0.5523, "step": 653 }, { "epoch": 0.3161711385061639, "grad_norm": 0.46598993965098007, "learning_rate": 8.19450534204163e-06, "loss": 0.5428, "step": 654 }, { "epoch": 0.3166545806139715, "grad_norm": 0.48320040727215685, "learning_rate": 8.188351663014615e-06, "loss": 0.5511, "step": 655 }, { "epoch": 0.31713802272177904, "grad_norm": 0.4851268795547935, "learning_rate": 8.182189834303783e-06, "loss": 0.5515, "step": 656 }, { "epoch": 0.31762146482958664, "grad_norm": 0.4829311813743368, "learning_rate": 8.176019871659263e-06, "loss": 0.5425, "step": 657 }, { "epoch": 0.31810490693739424, "grad_norm": 0.4268110510337058, "learning_rate": 8.169841790851976e-06, "loss": 0.5192, "step": 658 }, { "epoch": 0.31858834904520184, "grad_norm": 0.46970357309915234, "learning_rate": 8.163655607673594e-06, "loss": 0.5516, "step": 659 }, { "epoch": 0.31907179115300943, "grad_norm": 0.4688205789040297, "learning_rate": 8.157461337936506e-06, "loss": 0.5398, "step": 660 }, { "epoch": 0.31955523326081703, "grad_norm": 0.49966496418563966, "learning_rate": 8.151258997473757e-06, "loss": 0.5501, "step": 661 }, { "epoch": 0.3200386753686246, "grad_norm": 0.4715831395525512, "learning_rate": 8.145048602139031e-06, "loss": 0.5473, "step": 662 }, { "epoch": 0.3205221174764322, "grad_norm": 0.46025918766438206, "learning_rate": 8.138830167806601e-06, "loss": 0.5481, "step": 663 }, { "epoch": 0.32100555958423976, "grad_norm": 0.49250849769551697, "learning_rate": 8.132603710371287e-06, "loss": 0.5563, "step": 664 }, { "epoch": 0.32148900169204736, "grad_norm": 0.46277397720994495, "learning_rate": 8.126369245748413e-06, "loss": 0.5418, "step": 665 }, { "epoch": 0.32197244379985496, "grad_norm": 0.44842320811529324, "learning_rate": 8.120126789873775e-06, "loss": 0.549, "step": 666 }, { "epoch": 0.32245588590766255, "grad_norm": 0.4487718178782243, "learning_rate": 8.113876358703593e-06, "loss": 0.5515, "step": 667 }, { "epoch": 0.32293932801547015, "grad_norm": 0.49737040438900676, "learning_rate": 8.10761796821447e-06, "loss": 0.5529, "step": 668 }, { "epoch": 0.32342277012327775, "grad_norm": 0.5088088437400782, "learning_rate": 8.10135163440336e-06, "loss": 0.5507, "step": 669 }, { "epoch": 0.32390621223108534, "grad_norm": 0.5221100660415426, "learning_rate": 8.095077373287517e-06, "loss": 0.5363, "step": 670 }, { "epoch": 0.32438965433889294, "grad_norm": 0.5098038198929602, "learning_rate": 8.088795200904457e-06, "loss": 0.5443, "step": 671 }, { "epoch": 0.3248730964467005, "grad_norm": 0.5299548080054053, "learning_rate": 8.08250513331192e-06, "loss": 0.5547, "step": 672 }, { "epoch": 0.3253565385545081, "grad_norm": 0.47991648628747413, "learning_rate": 8.076207186587826e-06, "loss": 0.552, "step": 673 }, { "epoch": 0.3258399806623157, "grad_norm": 0.4928995313967277, "learning_rate": 8.069901376830232e-06, "loss": 0.5449, "step": 674 }, { "epoch": 0.3263234227701233, "grad_norm": 0.526245201002504, "learning_rate": 8.063587720157298e-06, "loss": 0.5544, "step": 675 }, { "epoch": 0.32680686487793087, "grad_norm": 0.5169185895561939, "learning_rate": 8.057266232707239e-06, "loss": 0.5388, "step": 676 }, { "epoch": 0.32729030698573847, "grad_norm": 0.45862190884382065, "learning_rate": 8.050936930638285e-06, "loss": 0.5523, "step": 677 }, { "epoch": 0.32777374909354606, "grad_norm": 0.4791194354627634, "learning_rate": 8.044599830128643e-06, "loss": 0.5498, "step": 678 }, { "epoch": 0.32825719120135366, "grad_norm": 0.5040011739287719, "learning_rate": 8.038254947376454e-06, "loss": 0.5378, "step": 679 }, { "epoch": 0.3287406333091612, "grad_norm": 0.42346684737245893, "learning_rate": 8.03190229859975e-06, "loss": 0.5541, "step": 680 }, { "epoch": 0.3292240754169688, "grad_norm": 0.48225697444636256, "learning_rate": 8.02554190003641e-06, "loss": 0.5505, "step": 681 }, { "epoch": 0.3297075175247764, "grad_norm": 0.480200233217211, "learning_rate": 8.019173767944128e-06, "loss": 0.5563, "step": 682 }, { "epoch": 0.330190959632584, "grad_norm": 0.4406037883552503, "learning_rate": 8.012797918600363e-06, "loss": 0.5241, "step": 683 }, { "epoch": 0.3306744017403916, "grad_norm": 0.4838913486529156, "learning_rate": 8.006414368302297e-06, "loss": 0.5251, "step": 684 }, { "epoch": 0.3311578438481992, "grad_norm": 0.45454190895682295, "learning_rate": 8.000023133366804e-06, "loss": 0.5449, "step": 685 }, { "epoch": 0.3316412859560068, "grad_norm": 0.49869890620532237, "learning_rate": 7.99362423013039e-06, "loss": 0.5401, "step": 686 }, { "epoch": 0.3321247280638144, "grad_norm": 0.4727231220514769, "learning_rate": 7.98721767494917e-06, "loss": 0.5381, "step": 687 }, { "epoch": 0.332608170171622, "grad_norm": 0.46944667758244535, "learning_rate": 7.980803484198817e-06, "loss": 0.5542, "step": 688 }, { "epoch": 0.3330916122794295, "grad_norm": 0.4643616722232514, "learning_rate": 7.974381674274517e-06, "loss": 0.5394, "step": 689 }, { "epoch": 0.3335750543872371, "grad_norm": 0.4529493856728362, "learning_rate": 7.967952261590936e-06, "loss": 0.5478, "step": 690 }, { "epoch": 0.3340584964950447, "grad_norm": 0.4497900124215144, "learning_rate": 7.961515262582168e-06, "loss": 0.5387, "step": 691 }, { "epoch": 0.3345419386028523, "grad_norm": 0.4613195703294155, "learning_rate": 7.955070693701704e-06, "loss": 0.5488, "step": 692 }, { "epoch": 0.3350253807106599, "grad_norm": 0.45208853687907335, "learning_rate": 7.94861857142238e-06, "loss": 0.5161, "step": 693 }, { "epoch": 0.3355088228184675, "grad_norm": 0.45338462953665065, "learning_rate": 7.942158912236339e-06, "loss": 0.5504, "step": 694 }, { "epoch": 0.3359922649262751, "grad_norm": 0.45784135957705213, "learning_rate": 7.935691732654995e-06, "loss": 0.5525, "step": 695 }, { "epoch": 0.3364757070340827, "grad_norm": 0.4745455134248678, "learning_rate": 7.929217049208977e-06, "loss": 0.5549, "step": 696 }, { "epoch": 0.33695914914189024, "grad_norm": 0.46788843343497605, "learning_rate": 7.922734878448099e-06, "loss": 0.5543, "step": 697 }, { "epoch": 0.33744259124969783, "grad_norm": 0.4894111106267614, "learning_rate": 7.916245236941311e-06, "loss": 0.5456, "step": 698 }, { "epoch": 0.33792603335750543, "grad_norm": 0.4818527781927651, "learning_rate": 7.90974814127666e-06, "loss": 0.5436, "step": 699 }, { "epoch": 0.338409475465313, "grad_norm": 0.48230512049955104, "learning_rate": 7.903243608061246e-06, "loss": 0.5569, "step": 700 }, { "epoch": 0.3388929175731206, "grad_norm": 0.4651013778967097, "learning_rate": 7.89673165392118e-06, "loss": 0.5497, "step": 701 }, { "epoch": 0.3393763596809282, "grad_norm": 0.5263037891579944, "learning_rate": 7.890212295501542e-06, "loss": 0.5489, "step": 702 }, { "epoch": 0.3398598017887358, "grad_norm": 0.47525750483933155, "learning_rate": 7.883685549466337e-06, "loss": 0.5438, "step": 703 }, { "epoch": 0.3403432438965434, "grad_norm": 0.48435256135519467, "learning_rate": 7.877151432498456e-06, "loss": 0.5506, "step": 704 }, { "epoch": 0.34082668600435095, "grad_norm": 0.49040296450298604, "learning_rate": 7.870609961299627e-06, "loss": 0.536, "step": 705 }, { "epoch": 0.34131012811215855, "grad_norm": 0.4437135993163076, "learning_rate": 7.864061152590376e-06, "loss": 0.5539, "step": 706 }, { "epoch": 0.34179357021996615, "grad_norm": 0.48585487486606105, "learning_rate": 7.857505023109989e-06, "loss": 0.5461, "step": 707 }, { "epoch": 0.34227701232777374, "grad_norm": 0.48202275018795376, "learning_rate": 7.850941589616458e-06, "loss": 0.5371, "step": 708 }, { "epoch": 0.34276045443558134, "grad_norm": 0.4716406712767161, "learning_rate": 7.844370868886452e-06, "loss": 0.5557, "step": 709 }, { "epoch": 0.34324389654338894, "grad_norm": 0.49083867550017374, "learning_rate": 7.83779287771526e-06, "loss": 0.5459, "step": 710 }, { "epoch": 0.34372733865119653, "grad_norm": 0.49576681886311147, "learning_rate": 7.831207632916757e-06, "loss": 0.5466, "step": 711 }, { "epoch": 0.34421078075900413, "grad_norm": 0.4533195955521626, "learning_rate": 7.824615151323363e-06, "loss": 0.519, "step": 712 }, { "epoch": 0.3446942228668117, "grad_norm": 0.4638295505748454, "learning_rate": 7.818015449785987e-06, "loss": 0.5485, "step": 713 }, { "epoch": 0.34517766497461927, "grad_norm": 0.4802273717901249, "learning_rate": 7.811408545174001e-06, "loss": 0.5453, "step": 714 }, { "epoch": 0.34566110708242687, "grad_norm": 0.4633815927205105, "learning_rate": 7.804794454375189e-06, "loss": 0.5504, "step": 715 }, { "epoch": 0.34614454919023446, "grad_norm": 0.4455507327132057, "learning_rate": 7.798173194295693e-06, "loss": 0.5425, "step": 716 }, { "epoch": 0.34662799129804206, "grad_norm": 0.4555461305882042, "learning_rate": 7.791544781859993e-06, "loss": 0.5402, "step": 717 }, { "epoch": 0.34711143340584966, "grad_norm": 0.4459519768062681, "learning_rate": 7.784909234010843e-06, "loss": 0.5448, "step": 718 }, { "epoch": 0.34759487551365725, "grad_norm": 0.43036102684437805, "learning_rate": 7.778266567709239e-06, "loss": 0.5532, "step": 719 }, { "epoch": 0.34807831762146485, "grad_norm": 0.4640780423848208, "learning_rate": 7.771616799934372e-06, "loss": 0.5403, "step": 720 }, { "epoch": 0.34856175972927245, "grad_norm": 0.4785048230046999, "learning_rate": 7.764959947683581e-06, "loss": 0.5484, "step": 721 }, { "epoch": 0.34904520183708, "grad_norm": 0.5054523460781126, "learning_rate": 7.758296027972324e-06, "loss": 0.5367, "step": 722 }, { "epoch": 0.3495286439448876, "grad_norm": 0.46292028395178175, "learning_rate": 7.751625057834107e-06, "loss": 0.5221, "step": 723 }, { "epoch": 0.3500120860526952, "grad_norm": 0.5080355944834025, "learning_rate": 7.744947054320475e-06, "loss": 0.552, "step": 724 }, { "epoch": 0.3504955281605028, "grad_norm": 0.4692605361826857, "learning_rate": 7.73826203450094e-06, "loss": 0.5516, "step": 725 }, { "epoch": 0.3509789702683104, "grad_norm": 0.4452014990295793, "learning_rate": 7.731570015462953e-06, "loss": 0.5385, "step": 726 }, { "epoch": 0.35146241237611797, "grad_norm": 0.4407238797603078, "learning_rate": 7.724871014311853e-06, "loss": 0.5512, "step": 727 }, { "epoch": 0.35194585448392557, "grad_norm": 0.4578450461185458, "learning_rate": 7.718165048170827e-06, "loss": 0.5436, "step": 728 }, { "epoch": 0.35242929659173317, "grad_norm": 0.47134089664050416, "learning_rate": 7.711452134180865e-06, "loss": 0.5439, "step": 729 }, { "epoch": 0.3529127386995407, "grad_norm": 0.45807802586279717, "learning_rate": 7.704732289500717e-06, "loss": 0.535, "step": 730 }, { "epoch": 0.3533961808073483, "grad_norm": 0.46923078006226726, "learning_rate": 7.698005531306844e-06, "loss": 0.5438, "step": 731 }, { "epoch": 0.3538796229151559, "grad_norm": 0.47163216368627525, "learning_rate": 7.691271876793387e-06, "loss": 0.5412, "step": 732 }, { "epoch": 0.3543630650229635, "grad_norm": 0.43982060964801745, "learning_rate": 7.684531343172108e-06, "loss": 0.5326, "step": 733 }, { "epoch": 0.3548465071307711, "grad_norm": 0.49898422329655673, "learning_rate": 7.677783947672352e-06, "loss": 0.5352, "step": 734 }, { "epoch": 0.3553299492385787, "grad_norm": 0.4426035951569431, "learning_rate": 7.67102970754101e-06, "loss": 0.5083, "step": 735 }, { "epoch": 0.3558133913463863, "grad_norm": 0.473881609856312, "learning_rate": 7.664268640042459e-06, "loss": 0.5493, "step": 736 }, { "epoch": 0.3562968334541939, "grad_norm": 0.48477702288906854, "learning_rate": 7.657500762458536e-06, "loss": 0.5415, "step": 737 }, { "epoch": 0.3567802755620014, "grad_norm": 0.4673684560489235, "learning_rate": 7.65072609208848e-06, "loss": 0.5402, "step": 738 }, { "epoch": 0.357263717669809, "grad_norm": 0.45922228645390506, "learning_rate": 7.643944646248898e-06, "loss": 0.5523, "step": 739 }, { "epoch": 0.3577471597776166, "grad_norm": 0.5023203702238386, "learning_rate": 7.637156442273705e-06, "loss": 0.5472, "step": 740 }, { "epoch": 0.3582306018854242, "grad_norm": 0.5158066743775931, "learning_rate": 7.630361497514104e-06, "loss": 0.5409, "step": 741 }, { "epoch": 0.3587140439932318, "grad_norm": 0.4279389055361383, "learning_rate": 7.6235598293385184e-06, "loss": 0.548, "step": 742 }, { "epoch": 0.3591974861010394, "grad_norm": 0.48124896416843527, "learning_rate": 7.616751455132561e-06, "loss": 0.5061, "step": 743 }, { "epoch": 0.359680928208847, "grad_norm": 0.45130820986839676, "learning_rate": 7.6099363922989845e-06, "loss": 0.5408, "step": 744 }, { "epoch": 0.3601643703166546, "grad_norm": 0.43968032097493187, "learning_rate": 7.60311465825764e-06, "loss": 0.5419, "step": 745 }, { "epoch": 0.36064781242446214, "grad_norm": 0.4638052394642039, "learning_rate": 7.596286270445429e-06, "loss": 0.5474, "step": 746 }, { "epoch": 0.36113125453226974, "grad_norm": 0.47215533812036253, "learning_rate": 7.5894512463162595e-06, "loss": 0.5481, "step": 747 }, { "epoch": 0.36161469664007734, "grad_norm": 0.4910077404120728, "learning_rate": 7.5826096033410056e-06, "loss": 0.5483, "step": 748 }, { "epoch": 0.36209813874788493, "grad_norm": 0.49435519282302404, "learning_rate": 7.575761359007459e-06, "loss": 0.5375, "step": 749 }, { "epoch": 0.36258158085569253, "grad_norm": 0.4645080520487796, "learning_rate": 7.568906530820281e-06, "loss": 0.5406, "step": 750 }, { "epoch": 0.36306502296350013, "grad_norm": 0.48056836362105476, "learning_rate": 7.562045136300969e-06, "loss": 0.547, "step": 751 }, { "epoch": 0.3635484650713077, "grad_norm": 0.4648855493783244, "learning_rate": 7.555177192987797e-06, "loss": 0.5372, "step": 752 }, { "epoch": 0.3640319071791153, "grad_norm": 0.43851001194612105, "learning_rate": 7.5483027184357825e-06, "loss": 0.5484, "step": 753 }, { "epoch": 0.3645153492869229, "grad_norm": 0.4234049796935857, "learning_rate": 7.541421730216638e-06, "loss": 0.4914, "step": 754 }, { "epoch": 0.36499879139473046, "grad_norm": 0.4886945785128111, "learning_rate": 7.534534245918723e-06, "loss": 0.5362, "step": 755 }, { "epoch": 0.36548223350253806, "grad_norm": 0.47490196043064764, "learning_rate": 7.527640283147003e-06, "loss": 0.5387, "step": 756 }, { "epoch": 0.36596567561034565, "grad_norm": 0.4422931000418374, "learning_rate": 7.520739859523001e-06, "loss": 0.5334, "step": 757 }, { "epoch": 0.36644911771815325, "grad_norm": 0.45103949345201827, "learning_rate": 7.513832992684758e-06, "loss": 0.5423, "step": 758 }, { "epoch": 0.36693255982596085, "grad_norm": 0.5023542886793314, "learning_rate": 7.50691970028678e-06, "loss": 0.5371, "step": 759 }, { "epoch": 0.36741600193376844, "grad_norm": 0.48961990962706975, "learning_rate": 7.500000000000001e-06, "loss": 0.5602, "step": 760 }, { "epoch": 0.36789944404157604, "grad_norm": 0.47244479352550756, "learning_rate": 7.493073909511732e-06, "loss": 0.5399, "step": 761 }, { "epoch": 0.36838288614938364, "grad_norm": 0.47262373426445514, "learning_rate": 7.486141446525619e-06, "loss": 0.5465, "step": 762 }, { "epoch": 0.3688663282571912, "grad_norm": 0.47837508137309714, "learning_rate": 7.479202628761597e-06, "loss": 0.5412, "step": 763 }, { "epoch": 0.3693497703649988, "grad_norm": 0.47771254343171743, "learning_rate": 7.472257473955841e-06, "loss": 0.5429, "step": 764 }, { "epoch": 0.36983321247280637, "grad_norm": 0.45892920422210776, "learning_rate": 7.465305999860728e-06, "loss": 0.5358, "step": 765 }, { "epoch": 0.37031665458061397, "grad_norm": 0.4636269912834914, "learning_rate": 7.4583482242447856e-06, "loss": 0.528, "step": 766 }, { "epoch": 0.37080009668842157, "grad_norm": 0.45196732778688614, "learning_rate": 7.45138416489265e-06, "loss": 0.5466, "step": 767 }, { "epoch": 0.37128353879622916, "grad_norm": 0.44240214579051484, "learning_rate": 7.444413839605017e-06, "loss": 0.5315, "step": 768 }, { "epoch": 0.37176698090403676, "grad_norm": 0.45295775865600874, "learning_rate": 7.437437266198602e-06, "loss": 0.5443, "step": 769 }, { "epoch": 0.37225042301184436, "grad_norm": 0.44966146652009026, "learning_rate": 7.430454462506085e-06, "loss": 0.5417, "step": 770 }, { "epoch": 0.3727338651196519, "grad_norm": 0.451144144721217, "learning_rate": 7.423465446376079e-06, "loss": 0.5389, "step": 771 }, { "epoch": 0.3732173072274595, "grad_norm": 0.45620688667035586, "learning_rate": 7.416470235673069e-06, "loss": 0.538, "step": 772 }, { "epoch": 0.3737007493352671, "grad_norm": 0.44604596469243557, "learning_rate": 7.40946884827738e-06, "loss": 0.5293, "step": 773 }, { "epoch": 0.3741841914430747, "grad_norm": 0.4476638425696451, "learning_rate": 7.402461302085121e-06, "loss": 0.5402, "step": 774 }, { "epoch": 0.3746676335508823, "grad_norm": 0.46401364076227924, "learning_rate": 7.395447615008147e-06, "loss": 0.5377, "step": 775 }, { "epoch": 0.3751510756586899, "grad_norm": 0.46250351228418424, "learning_rate": 7.388427804974003e-06, "loss": 0.5455, "step": 776 }, { "epoch": 0.3756345177664975, "grad_norm": 0.448294668881327, "learning_rate": 7.381401889925894e-06, "loss": 0.5311, "step": 777 }, { "epoch": 0.3761179598743051, "grad_norm": 0.4490599419042619, "learning_rate": 7.374369887822623e-06, "loss": 0.5416, "step": 778 }, { "epoch": 0.3766014019821126, "grad_norm": 0.44904272890455516, "learning_rate": 7.367331816638554e-06, "loss": 0.5464, "step": 779 }, { "epoch": 0.3770848440899202, "grad_norm": 0.4731428544902919, "learning_rate": 7.360287694363566e-06, "loss": 0.5415, "step": 780 }, { "epoch": 0.3775682861977278, "grad_norm": 0.4760085542862297, "learning_rate": 7.353237539002999e-06, "loss": 0.5388, "step": 781 }, { "epoch": 0.3780517283055354, "grad_norm": 0.4817431722421546, "learning_rate": 7.346181368577624e-06, "loss": 0.5513, "step": 782 }, { "epoch": 0.378535170413343, "grad_norm": 0.462287277146555, "learning_rate": 7.3391192011235764e-06, "loss": 0.5393, "step": 783 }, { "epoch": 0.3790186125211506, "grad_norm": 0.44812435609118556, "learning_rate": 7.3320510546923285e-06, "loss": 0.5509, "step": 784 }, { "epoch": 0.3795020546289582, "grad_norm": 0.48025260306275075, "learning_rate": 7.324976947350631e-06, "loss": 0.5387, "step": 785 }, { "epoch": 0.3799854967367658, "grad_norm": 0.4639545538957294, "learning_rate": 7.317896897180472e-06, "loss": 0.5298, "step": 786 }, { "epoch": 0.3804689388445734, "grad_norm": 0.46088677266135386, "learning_rate": 7.31081092227903e-06, "loss": 0.5371, "step": 787 }, { "epoch": 0.38095238095238093, "grad_norm": 0.4446814202000039, "learning_rate": 7.303719040758631e-06, "loss": 0.5368, "step": 788 }, { "epoch": 0.38143582306018853, "grad_norm": 0.4628164716114684, "learning_rate": 7.296621270746691e-06, "loss": 0.5439, "step": 789 }, { "epoch": 0.3819192651679961, "grad_norm": 0.46916898249294825, "learning_rate": 7.289517630385687e-06, "loss": 0.5188, "step": 790 }, { "epoch": 0.3824027072758037, "grad_norm": 0.44187034218765375, "learning_rate": 7.282408137833093e-06, "loss": 0.5404, "step": 791 }, { "epoch": 0.3828861493836113, "grad_norm": 0.47877844839355055, "learning_rate": 7.275292811261346e-06, "loss": 0.5377, "step": 792 }, { "epoch": 0.3833695914914189, "grad_norm": 0.46034254680546527, "learning_rate": 7.268171668857794e-06, "loss": 0.5489, "step": 793 }, { "epoch": 0.3838530335992265, "grad_norm": 0.4557903207271804, "learning_rate": 7.261044728824652e-06, "loss": 0.5415, "step": 794 }, { "epoch": 0.3843364757070341, "grad_norm": 0.477573699663043, "learning_rate": 7.253912009378953e-06, "loss": 0.5526, "step": 795 }, { "epoch": 0.38481991781484165, "grad_norm": 0.5200587123977845, "learning_rate": 7.246773528752501e-06, "loss": 0.5452, "step": 796 }, { "epoch": 0.38530335992264925, "grad_norm": 0.4504279257869106, "learning_rate": 7.239629305191828e-06, "loss": 0.528, "step": 797 }, { "epoch": 0.38578680203045684, "grad_norm": 0.47332344951633437, "learning_rate": 7.2324793569581474e-06, "loss": 0.5413, "step": 798 }, { "epoch": 0.38627024413826444, "grad_norm": 0.4658360771399747, "learning_rate": 7.2253237023273e-06, "loss": 0.5111, "step": 799 }, { "epoch": 0.38675368624607204, "grad_norm": 0.4820288472202763, "learning_rate": 7.21816235958972e-06, "loss": 0.5472, "step": 800 }, { "epoch": 0.38723712835387963, "grad_norm": 0.4713168204495061, "learning_rate": 7.210995347050372e-06, "loss": 0.5441, "step": 801 }, { "epoch": 0.38772057046168723, "grad_norm": 0.45204543208032005, "learning_rate": 7.203822683028721e-06, "loss": 0.5403, "step": 802 }, { "epoch": 0.3882040125694948, "grad_norm": 0.4577372851640406, "learning_rate": 7.196644385858673e-06, "loss": 0.5303, "step": 803 }, { "epoch": 0.38868745467730237, "grad_norm": 0.4458565390228467, "learning_rate": 7.189460473888535e-06, "loss": 0.5453, "step": 804 }, { "epoch": 0.38917089678510997, "grad_norm": 0.4735785239686837, "learning_rate": 7.182270965480963e-06, "loss": 0.5491, "step": 805 }, { "epoch": 0.38965433889291756, "grad_norm": 0.44995709460533084, "learning_rate": 7.17507587901292e-06, "loss": 0.5328, "step": 806 }, { "epoch": 0.39013778100072516, "grad_norm": 0.45714294309440695, "learning_rate": 7.167875232875632e-06, "loss": 0.5401, "step": 807 }, { "epoch": 0.39062122310853276, "grad_norm": 0.45625350898261685, "learning_rate": 7.160669045474524e-06, "loss": 0.5198, "step": 808 }, { "epoch": 0.39110466521634035, "grad_norm": 0.45109568818047574, "learning_rate": 7.153457335229196e-06, "loss": 0.5396, "step": 809 }, { "epoch": 0.39158810732414795, "grad_norm": 0.4362283889114229, "learning_rate": 7.146240120573358e-06, "loss": 0.5421, "step": 810 }, { "epoch": 0.39207154943195555, "grad_norm": 0.44454460051068084, "learning_rate": 7.1390174199547945e-06, "loss": 0.5495, "step": 811 }, { "epoch": 0.3925549915397631, "grad_norm": 0.47028424443890243, "learning_rate": 7.131789251835309e-06, "loss": 0.5528, "step": 812 }, { "epoch": 0.3930384336475707, "grad_norm": 0.44600311061331627, "learning_rate": 7.124555634690684e-06, "loss": 0.546, "step": 813 }, { "epoch": 0.3935218757553783, "grad_norm": 0.42127806705345067, "learning_rate": 7.117316587010625e-06, "loss": 0.5164, "step": 814 }, { "epoch": 0.3940053178631859, "grad_norm": 0.4919804442725305, "learning_rate": 7.110072127298722e-06, "loss": 0.5405, "step": 815 }, { "epoch": 0.3944887599709935, "grad_norm": 0.433673881020081, "learning_rate": 7.1028222740724e-06, "loss": 0.5474, "step": 816 }, { "epoch": 0.39497220207880107, "grad_norm": 0.44154650103792475, "learning_rate": 7.095567045862867e-06, "loss": 0.537, "step": 817 }, { "epoch": 0.39545564418660867, "grad_norm": 0.4862239172154185, "learning_rate": 7.0883064612150684e-06, "loss": 0.5418, "step": 818 }, { "epoch": 0.39593908629441626, "grad_norm": 0.44482243313717656, "learning_rate": 7.081040538687649e-06, "loss": 0.5421, "step": 819 }, { "epoch": 0.3964225284022238, "grad_norm": 0.4674091065145529, "learning_rate": 7.073769296852888e-06, "loss": 0.5322, "step": 820 }, { "epoch": 0.3969059705100314, "grad_norm": 0.42008927529357837, "learning_rate": 7.066492754296668e-06, "loss": 0.5021, "step": 821 }, { "epoch": 0.397389412617839, "grad_norm": 0.4604995654095531, "learning_rate": 7.059210929618416e-06, "loss": 0.5449, "step": 822 }, { "epoch": 0.3978728547256466, "grad_norm": 0.45094698267399413, "learning_rate": 7.051923841431063e-06, "loss": 0.5376, "step": 823 }, { "epoch": 0.3983562968334542, "grad_norm": 0.46562198473083916, "learning_rate": 7.044631508360996e-06, "loss": 0.5449, "step": 824 }, { "epoch": 0.3988397389412618, "grad_norm": 0.45370053407350325, "learning_rate": 7.037333949048005e-06, "loss": 0.5443, "step": 825 }, { "epoch": 0.3993231810490694, "grad_norm": 0.45490375066885613, "learning_rate": 7.03003118214524e-06, "loss": 0.4994, "step": 826 }, { "epoch": 0.399806623156877, "grad_norm": 0.4689025885486327, "learning_rate": 7.022723226319159e-06, "loss": 0.5249, "step": 827 }, { "epoch": 0.4002900652646846, "grad_norm": 0.4587224558807211, "learning_rate": 7.0154101002494914e-06, "loss": 0.531, "step": 828 }, { "epoch": 0.4007735073724921, "grad_norm": 0.43840918685603564, "learning_rate": 7.008091822629172e-06, "loss": 0.5331, "step": 829 }, { "epoch": 0.4012569494802997, "grad_norm": 0.5046588696515812, "learning_rate": 7.00076841216431e-06, "loss": 0.5302, "step": 830 }, { "epoch": 0.4017403915881073, "grad_norm": 0.485313377425035, "learning_rate": 6.993439887574133e-06, "loss": 0.5418, "step": 831 }, { "epoch": 0.4022238336959149, "grad_norm": 0.47268076488419086, "learning_rate": 6.986106267590942e-06, "loss": 0.535, "step": 832 }, { "epoch": 0.4027072758037225, "grad_norm": 0.4528644527398112, "learning_rate": 6.978767570960057e-06, "loss": 0.5362, "step": 833 }, { "epoch": 0.4031907179115301, "grad_norm": 0.458241227473586, "learning_rate": 6.971423816439782e-06, "loss": 0.5346, "step": 834 }, { "epoch": 0.4036741600193377, "grad_norm": 0.476721951726036, "learning_rate": 6.964075022801341e-06, "loss": 0.541, "step": 835 }, { "epoch": 0.4041576021271453, "grad_norm": 0.4634216130492689, "learning_rate": 6.956721208828847e-06, "loss": 0.5441, "step": 836 }, { "epoch": 0.40464104423495284, "grad_norm": 0.46202808494163927, "learning_rate": 6.949362393319239e-06, "loss": 0.5416, "step": 837 }, { "epoch": 0.40512448634276044, "grad_norm": 0.44981029617918733, "learning_rate": 6.941998595082243e-06, "loss": 0.5438, "step": 838 }, { "epoch": 0.40560792845056803, "grad_norm": 0.44835769630421096, "learning_rate": 6.934629832940322e-06, "loss": 0.5322, "step": 839 }, { "epoch": 0.40609137055837563, "grad_norm": 0.4879526087044361, "learning_rate": 6.927256125728624e-06, "loss": 0.544, "step": 840 }, { "epoch": 0.4065748126661832, "grad_norm": 0.47319742567660167, "learning_rate": 6.91987749229494e-06, "loss": 0.5401, "step": 841 }, { "epoch": 0.4070582547739908, "grad_norm": 0.4492955794688954, "learning_rate": 6.91249395149965e-06, "loss": 0.5393, "step": 842 }, { "epoch": 0.4075416968817984, "grad_norm": 0.4436463500280526, "learning_rate": 6.905105522215684e-06, "loss": 0.5384, "step": 843 }, { "epoch": 0.408025138989606, "grad_norm": 0.43897712052796284, "learning_rate": 6.897712223328457e-06, "loss": 0.5297, "step": 844 }, { "epoch": 0.40850858109741356, "grad_norm": 0.5141765277378779, "learning_rate": 6.89031407373584e-06, "loss": 0.5386, "step": 845 }, { "epoch": 0.40899202320522116, "grad_norm": 0.48662407879743685, "learning_rate": 6.8829110923481e-06, "loss": 0.5429, "step": 846 }, { "epoch": 0.40947546531302875, "grad_norm": 0.4671606204232888, "learning_rate": 6.875503298087853e-06, "loss": 0.5339, "step": 847 }, { "epoch": 0.40995890742083635, "grad_norm": 0.47636170298906977, "learning_rate": 6.868090709890016e-06, "loss": 0.5392, "step": 848 }, { "epoch": 0.41044234952864395, "grad_norm": 0.4249603079226182, "learning_rate": 6.8606733467017675e-06, "loss": 0.5046, "step": 849 }, { "epoch": 0.41092579163645154, "grad_norm": 0.4669023850804772, "learning_rate": 6.85325122748248e-06, "loss": 0.5331, "step": 850 }, { "epoch": 0.41140923374425914, "grad_norm": 0.43795275267260886, "learning_rate": 6.845824371203691e-06, "loss": 0.5409, "step": 851 }, { "epoch": 0.41189267585206674, "grad_norm": 0.44363617551756607, "learning_rate": 6.838392796849042e-06, "loss": 0.5371, "step": 852 }, { "epoch": 0.4123761179598743, "grad_norm": 0.4451816120752011, "learning_rate": 6.830956523414239e-06, "loss": 0.5304, "step": 853 }, { "epoch": 0.4128595600676819, "grad_norm": 0.4684744852800341, "learning_rate": 6.8235155699069944e-06, "loss": 0.5316, "step": 854 }, { "epoch": 0.41334300217548947, "grad_norm": 0.44092290998190986, "learning_rate": 6.816069955346986e-06, "loss": 0.5127, "step": 855 }, { "epoch": 0.41382644428329707, "grad_norm": 0.49751628964469147, "learning_rate": 6.808619698765804e-06, "loss": 0.5459, "step": 856 }, { "epoch": 0.41430988639110466, "grad_norm": 0.45953817226939175, "learning_rate": 6.8011648192069045e-06, "loss": 0.5316, "step": 857 }, { "epoch": 0.41479332849891226, "grad_norm": 0.470694123027967, "learning_rate": 6.7937053357255585e-06, "loss": 0.5341, "step": 858 }, { "epoch": 0.41527677060671986, "grad_norm": 0.482654546386677, "learning_rate": 6.786241267388812e-06, "loss": 0.5392, "step": 859 }, { "epoch": 0.41576021271452746, "grad_norm": 0.49418422954918506, "learning_rate": 6.778772633275421e-06, "loss": 0.5259, "step": 860 }, { "epoch": 0.41624365482233505, "grad_norm": 0.4508128718503279, "learning_rate": 6.771299452475818e-06, "loss": 0.5439, "step": 861 }, { "epoch": 0.4167270969301426, "grad_norm": 0.483501145929134, "learning_rate": 6.763821744092054e-06, "loss": 0.521, "step": 862 }, { "epoch": 0.4172105390379502, "grad_norm": 0.45484324344169746, "learning_rate": 6.756339527237756e-06, "loss": 0.5282, "step": 863 }, { "epoch": 0.4176939811457578, "grad_norm": 0.47800071067084154, "learning_rate": 6.748852821038075e-06, "loss": 0.5362, "step": 864 }, { "epoch": 0.4181774232535654, "grad_norm": 0.4561384481336352, "learning_rate": 6.741361644629629e-06, "loss": 0.5452, "step": 865 }, { "epoch": 0.418660865361373, "grad_norm": 0.4974806458018085, "learning_rate": 6.733866017160475e-06, "loss": 0.5374, "step": 866 }, { "epoch": 0.4191443074691806, "grad_norm": 0.4696802681175673, "learning_rate": 6.7263659577900375e-06, "loss": 0.5368, "step": 867 }, { "epoch": 0.4196277495769882, "grad_norm": 0.46421987800289705, "learning_rate": 6.718861485689077e-06, "loss": 0.5361, "step": 868 }, { "epoch": 0.42011119168479577, "grad_norm": 0.42817223127930704, "learning_rate": 6.711352620039623e-06, "loss": 0.5132, "step": 869 }, { "epoch": 0.4205946337926033, "grad_norm": 0.4191221628297101, "learning_rate": 6.703839380034945e-06, "loss": 0.5282, "step": 870 }, { "epoch": 0.4210780759004109, "grad_norm": 0.4151086659722137, "learning_rate": 6.6963217848794895e-06, "loss": 0.5046, "step": 871 }, { "epoch": 0.4215615180082185, "grad_norm": 0.4638906293888461, "learning_rate": 6.6887998537888354e-06, "loss": 0.5365, "step": 872 }, { "epoch": 0.4220449601160261, "grad_norm": 0.46061668652958593, "learning_rate": 6.681273605989643e-06, "loss": 0.5315, "step": 873 }, { "epoch": 0.4225284022238337, "grad_norm": 0.5190170389663172, "learning_rate": 6.673743060719613e-06, "loss": 0.5328, "step": 874 }, { "epoch": 0.4230118443316413, "grad_norm": 0.45460321890251315, "learning_rate": 6.666208237227421e-06, "loss": 0.5359, "step": 875 }, { "epoch": 0.4234952864394489, "grad_norm": 0.4292973087733905, "learning_rate": 6.6586691547726855e-06, "loss": 0.5139, "step": 876 }, { "epoch": 0.4239787285472565, "grad_norm": 0.47082103025419264, "learning_rate": 6.651125832625908e-06, "loss": 0.5455, "step": 877 }, { "epoch": 0.42446217065506403, "grad_norm": 0.5113179606142492, "learning_rate": 6.6435782900684284e-06, "loss": 0.5528, "step": 878 }, { "epoch": 0.4249456127628716, "grad_norm": 0.48001824284611705, "learning_rate": 6.636026546392374e-06, "loss": 0.5391, "step": 879 }, { "epoch": 0.4254290548706792, "grad_norm": 0.4190847750909328, "learning_rate": 6.628470620900611e-06, "loss": 0.5309, "step": 880 }, { "epoch": 0.4259124969784868, "grad_norm": 0.4429086362697282, "learning_rate": 6.620910532906692e-06, "loss": 0.5194, "step": 881 }, { "epoch": 0.4263959390862944, "grad_norm": 0.4425962079317176, "learning_rate": 6.613346301734813e-06, "loss": 0.5132, "step": 882 }, { "epoch": 0.426879381194102, "grad_norm": 0.4614396271141222, "learning_rate": 6.605777946719757e-06, "loss": 0.5358, "step": 883 }, { "epoch": 0.4273628233019096, "grad_norm": 7.793396607210369, "learning_rate": 6.59820548720685e-06, "loss": 0.74, "step": 884 }, { "epoch": 0.4278462654097172, "grad_norm": 0.47916638143499957, "learning_rate": 6.590628942551909e-06, "loss": 0.5401, "step": 885 }, { "epoch": 0.42832970751752475, "grad_norm": 0.5010520173856451, "learning_rate": 6.583048332121193e-06, "loss": 0.5384, "step": 886 }, { "epoch": 0.42881314962533235, "grad_norm": 0.43637840250511184, "learning_rate": 6.5754636752913535e-06, "loss": 0.5018, "step": 887 }, { "epoch": 0.42929659173313994, "grad_norm": 0.4347273985958766, "learning_rate": 6.567874991449383e-06, "loss": 0.5303, "step": 888 }, { "epoch": 0.42978003384094754, "grad_norm": 0.470696307591686, "learning_rate": 6.560282299992571e-06, "loss": 0.5454, "step": 889 }, { "epoch": 0.43026347594875514, "grad_norm": 0.43949603254219816, "learning_rate": 6.552685620328447e-06, "loss": 0.5115, "step": 890 }, { "epoch": 0.43074691805656273, "grad_norm": 0.5002179505262337, "learning_rate": 6.545084971874738e-06, "loss": 0.5386, "step": 891 }, { "epoch": 0.43123036016437033, "grad_norm": 0.48898211514461637, "learning_rate": 6.537480374059313e-06, "loss": 0.5464, "step": 892 }, { "epoch": 0.4317138022721779, "grad_norm": 0.47111181264654595, "learning_rate": 6.529871846320138e-06, "loss": 0.5225, "step": 893 }, { "epoch": 0.4321972443799855, "grad_norm": 0.47111234243294525, "learning_rate": 6.522259408105223e-06, "loss": 0.5363, "step": 894 }, { "epoch": 0.43268068648779306, "grad_norm": 0.4867092311804253, "learning_rate": 6.514643078872571e-06, "loss": 0.533, "step": 895 }, { "epoch": 0.43316412859560066, "grad_norm": 0.4583905211154658, "learning_rate": 6.507022878090137e-06, "loss": 0.5428, "step": 896 }, { "epoch": 0.43364757070340826, "grad_norm": 0.4950332559469317, "learning_rate": 6.499398825235767e-06, "loss": 0.5337, "step": 897 }, { "epoch": 0.43413101281121586, "grad_norm": 0.46856791974797646, "learning_rate": 6.491770939797152e-06, "loss": 0.5323, "step": 898 }, { "epoch": 0.43461445491902345, "grad_norm": 0.4646364739679311, "learning_rate": 6.4841392412717864e-06, "loss": 0.5407, "step": 899 }, { "epoch": 0.43509789702683105, "grad_norm": 0.43558921933796657, "learning_rate": 6.476503749166903e-06, "loss": 0.5347, "step": 900 }, { "epoch": 0.43558133913463865, "grad_norm": 0.4961569425990492, "learning_rate": 6.4688644829994385e-06, "loss": 0.5295, "step": 901 }, { "epoch": 0.43606478124244624, "grad_norm": 0.4698493360586227, "learning_rate": 6.4612214622959705e-06, "loss": 0.5457, "step": 902 }, { "epoch": 0.4365482233502538, "grad_norm": 0.435939008717009, "learning_rate": 6.453574706592676e-06, "loss": 0.521, "step": 903 }, { "epoch": 0.4370316654580614, "grad_norm": 0.5347621942726917, "learning_rate": 6.44592423543528e-06, "loss": 0.5354, "step": 904 }, { "epoch": 0.437515107565869, "grad_norm": 0.49607229965381217, "learning_rate": 6.4382700683790025e-06, "loss": 0.544, "step": 905 }, { "epoch": 0.4379985496736766, "grad_norm": 0.4645864217211478, "learning_rate": 6.4306122249885105e-06, "loss": 0.5192, "step": 906 }, { "epoch": 0.43848199178148417, "grad_norm": 0.46522668277788565, "learning_rate": 6.422950724837872e-06, "loss": 0.526, "step": 907 }, { "epoch": 0.43896543388929177, "grad_norm": 0.46658657241667495, "learning_rate": 6.415285587510495e-06, "loss": 0.5088, "step": 908 }, { "epoch": 0.43944887599709936, "grad_norm": 0.49352648435992785, "learning_rate": 6.407616832599091e-06, "loss": 0.5291, "step": 909 }, { "epoch": 0.43993231810490696, "grad_norm": 0.5022475233173346, "learning_rate": 6.399944479705615e-06, "loss": 0.5349, "step": 910 }, { "epoch": 0.4404157602127145, "grad_norm": 0.4645078975290342, "learning_rate": 6.392268548441218e-06, "loss": 0.5356, "step": 911 }, { "epoch": 0.4408992023205221, "grad_norm": 0.4656283146754065, "learning_rate": 6.384589058426201e-06, "loss": 0.5297, "step": 912 }, { "epoch": 0.4413826444283297, "grad_norm": 0.4778447743673107, "learning_rate": 6.3769060292899585e-06, "loss": 0.531, "step": 913 }, { "epoch": 0.4418660865361373, "grad_norm": 0.5067362493290994, "learning_rate": 6.3692194806709326e-06, "loss": 0.5266, "step": 914 }, { "epoch": 0.4423495286439449, "grad_norm": 0.4583050351500669, "learning_rate": 6.36152943221656e-06, "loss": 0.5068, "step": 915 }, { "epoch": 0.4428329707517525, "grad_norm": 0.44674303828563183, "learning_rate": 6.353835903583225e-06, "loss": 0.5135, "step": 916 }, { "epoch": 0.4433164128595601, "grad_norm": 0.4845057402223313, "learning_rate": 6.346138914436207e-06, "loss": 0.53, "step": 917 }, { "epoch": 0.4437998549673677, "grad_norm": 0.46927160898270703, "learning_rate": 6.338438484449632e-06, "loss": 0.5282, "step": 918 }, { "epoch": 0.4442832970751752, "grad_norm": 0.46844884470128584, "learning_rate": 6.330734633306415e-06, "loss": 0.5205, "step": 919 }, { "epoch": 0.4447667391829828, "grad_norm": 0.46793830654029384, "learning_rate": 6.3230273806982254e-06, "loss": 0.5354, "step": 920 }, { "epoch": 0.4452501812907904, "grad_norm": 0.46904655646766624, "learning_rate": 6.31531674632542e-06, "loss": 0.5403, "step": 921 }, { "epoch": 0.445733623398598, "grad_norm": 0.4797829974325548, "learning_rate": 6.307602749897001e-06, "loss": 0.5285, "step": 922 }, { "epoch": 0.4462170655064056, "grad_norm": 0.5126549879385887, "learning_rate": 6.299885411130566e-06, "loss": 0.5293, "step": 923 }, { "epoch": 0.4467005076142132, "grad_norm": 0.47873452298166375, "learning_rate": 6.292164749752256e-06, "loss": 0.5358, "step": 924 }, { "epoch": 0.4471839497220208, "grad_norm": 0.4697820734594197, "learning_rate": 6.284440785496701e-06, "loss": 0.5303, "step": 925 }, { "epoch": 0.4476673918298284, "grad_norm": 0.48049048675766015, "learning_rate": 6.27671353810698e-06, "loss": 0.5384, "step": 926 }, { "epoch": 0.448150833937636, "grad_norm": 0.44829305517954404, "learning_rate": 6.268983027334557e-06, "loss": 0.5349, "step": 927 }, { "epoch": 0.44863427604544354, "grad_norm": 0.47046261605559686, "learning_rate": 6.2612492729392396e-06, "loss": 0.5445, "step": 928 }, { "epoch": 0.44911771815325113, "grad_norm": 0.43943250727373434, "learning_rate": 6.25351229468913e-06, "loss": 0.5243, "step": 929 }, { "epoch": 0.44960116026105873, "grad_norm": 0.457243800658822, "learning_rate": 6.245772112360568e-06, "loss": 0.5335, "step": 930 }, { "epoch": 0.4500846023688663, "grad_norm": 0.4558342802943274, "learning_rate": 6.2380287457380814e-06, "loss": 0.5295, "step": 931 }, { "epoch": 0.4505680444766739, "grad_norm": 0.46490304474994104, "learning_rate": 6.230282214614342e-06, "loss": 0.5277, "step": 932 }, { "epoch": 0.4510514865844815, "grad_norm": 0.46808167357734604, "learning_rate": 6.222532538790107e-06, "loss": 0.5038, "step": 933 }, { "epoch": 0.4515349286922891, "grad_norm": 0.44746189473984266, "learning_rate": 6.214779738074169e-06, "loss": 0.5482, "step": 934 }, { "epoch": 0.4520183708000967, "grad_norm": 0.4749348946440431, "learning_rate": 6.2070238322833165e-06, "loss": 0.5408, "step": 935 }, { "epoch": 0.45250181290790426, "grad_norm": 0.4105832023778957, "learning_rate": 6.199264841242267e-06, "loss": 0.507, "step": 936 }, { "epoch": 0.45298525501571185, "grad_norm": 0.45569820188618954, "learning_rate": 6.191502784783627e-06, "loss": 0.5361, "step": 937 }, { "epoch": 0.45346869712351945, "grad_norm": 0.4487391558411953, "learning_rate": 6.183737682747839e-06, "loss": 0.5404, "step": 938 }, { "epoch": 0.45395213923132705, "grad_norm": 0.38508572249224443, "learning_rate": 6.17596955498313e-06, "loss": 0.475, "step": 939 }, { "epoch": 0.45443558133913464, "grad_norm": 0.47287816273000344, "learning_rate": 6.16819842134546e-06, "loss": 0.5293, "step": 940 }, { "epoch": 0.45491902344694224, "grad_norm": 0.4650031849319042, "learning_rate": 6.160424301698472e-06, "loss": 0.5315, "step": 941 }, { "epoch": 0.45540246555474984, "grad_norm": 0.4613271364601288, "learning_rate": 6.1526472159134454e-06, "loss": 0.5398, "step": 942 }, { "epoch": 0.45588590766255743, "grad_norm": 0.42626048007656836, "learning_rate": 6.1448671838692365e-06, "loss": 0.5246, "step": 943 }, { "epoch": 0.456369349770365, "grad_norm": 0.45709658015590027, "learning_rate": 6.1370842254522325e-06, "loss": 0.5392, "step": 944 }, { "epoch": 0.45685279187817257, "grad_norm": 0.4969316742066114, "learning_rate": 6.129298360556304e-06, "loss": 0.5216, "step": 945 }, { "epoch": 0.45733623398598017, "grad_norm": 0.48874038272824616, "learning_rate": 6.1215096090827485e-06, "loss": 0.5341, "step": 946 }, { "epoch": 0.45781967609378776, "grad_norm": 0.43416511670164404, "learning_rate": 6.1137179909402445e-06, "loss": 0.5357, "step": 947 }, { "epoch": 0.45830311820159536, "grad_norm": 0.4398289245515299, "learning_rate": 6.105923526044794e-06, "loss": 0.5312, "step": 948 }, { "epoch": 0.45878656030940296, "grad_norm": 0.506729531362168, "learning_rate": 6.098126234319679e-06, "loss": 0.5164, "step": 949 }, { "epoch": 0.45927000241721055, "grad_norm": 0.49661020376021475, "learning_rate": 6.0903261356954035e-06, "loss": 0.5406, "step": 950 }, { "epoch": 0.45975344452501815, "grad_norm": 0.49213602324131056, "learning_rate": 6.08252325010965e-06, "loss": 0.5369, "step": 951 }, { "epoch": 0.4602368866328257, "grad_norm": 0.4522403269153531, "learning_rate": 6.074717597507223e-06, "loss": 0.5318, "step": 952 }, { "epoch": 0.4607203287406333, "grad_norm": 0.46334490270574613, "learning_rate": 6.066909197839996e-06, "loss": 0.5053, "step": 953 }, { "epoch": 0.4612037708484409, "grad_norm": 0.4555176211572043, "learning_rate": 6.059098071066874e-06, "loss": 0.5313, "step": 954 }, { "epoch": 0.4616872129562485, "grad_norm": 0.4697386359927019, "learning_rate": 6.051284237153723e-06, "loss": 0.5304, "step": 955 }, { "epoch": 0.4621706550640561, "grad_norm": 0.47606054578122203, "learning_rate": 6.043467716073333e-06, "loss": 0.5392, "step": 956 }, { "epoch": 0.4626540971718637, "grad_norm": 0.4504863880612236, "learning_rate": 6.035648527805359e-06, "loss": 0.5333, "step": 957 }, { "epoch": 0.4631375392796713, "grad_norm": 0.47075391826579777, "learning_rate": 6.0278266923362805e-06, "loss": 0.5331, "step": 958 }, { "epoch": 0.46362098138747887, "grad_norm": 0.4403281566352786, "learning_rate": 6.0200022296593375e-06, "loss": 0.5432, "step": 959 }, { "epoch": 0.46410442349528647, "grad_norm": 0.4572023392519702, "learning_rate": 6.012175159774488e-06, "loss": 0.5323, "step": 960 }, { "epoch": 0.464587865603094, "grad_norm": 0.44773102967818523, "learning_rate": 6.004345502688353e-06, "loss": 0.5299, "step": 961 }, { "epoch": 0.4650713077109016, "grad_norm": 0.4371461582728146, "learning_rate": 5.996513278414166e-06, "loss": 0.5385, "step": 962 }, { "epoch": 0.4655547498187092, "grad_norm": 0.42182149099377253, "learning_rate": 5.988678506971726e-06, "loss": 0.5303, "step": 963 }, { "epoch": 0.4660381919265168, "grad_norm": 0.4594942911793403, "learning_rate": 5.980841208387338e-06, "loss": 0.503, "step": 964 }, { "epoch": 0.4665216340343244, "grad_norm": 0.4501324492723119, "learning_rate": 5.973001402693769e-06, "loss": 0.5253, "step": 965 }, { "epoch": 0.467005076142132, "grad_norm": 0.4373460975849127, "learning_rate": 5.965159109930196e-06, "loss": 0.5386, "step": 966 }, { "epoch": 0.4674885182499396, "grad_norm": 0.4418857621109752, "learning_rate": 5.957314350142149e-06, "loss": 0.529, "step": 967 }, { "epoch": 0.4679719603577472, "grad_norm": 0.485651415733174, "learning_rate": 5.94946714338147e-06, "loss": 0.538, "step": 968 }, { "epoch": 0.4684554024655547, "grad_norm": 0.44827050412474007, "learning_rate": 5.941617509706247e-06, "loss": 0.5333, "step": 969 }, { "epoch": 0.4689388445733623, "grad_norm": 0.44674045160443804, "learning_rate": 5.933765469180779e-06, "loss": 0.5329, "step": 970 }, { "epoch": 0.4694222866811699, "grad_norm": 0.47047904115025807, "learning_rate": 5.925911041875514e-06, "loss": 0.5304, "step": 971 }, { "epoch": 0.4699057287889775, "grad_norm": 0.47618192657266745, "learning_rate": 5.9180542478670025e-06, "loss": 0.5339, "step": 972 }, { "epoch": 0.4703891708967851, "grad_norm": 0.4372302817189476, "learning_rate": 5.910195107237842e-06, "loss": 0.5311, "step": 973 }, { "epoch": 0.4708726130045927, "grad_norm": 0.4599705922186888, "learning_rate": 5.902333640076627e-06, "loss": 0.5294, "step": 974 }, { "epoch": 0.4713560551124003, "grad_norm": 0.4668491954105507, "learning_rate": 5.894469866477905e-06, "loss": 0.5319, "step": 975 }, { "epoch": 0.4718394972202079, "grad_norm": 0.4298713287991765, "learning_rate": 5.886603806542114e-06, "loss": 0.5308, "step": 976 }, { "epoch": 0.47232293932801545, "grad_norm": 0.44120727064086296, "learning_rate": 5.878735480375537e-06, "loss": 0.5271, "step": 977 }, { "epoch": 0.47280638143582304, "grad_norm": 0.45517085979612587, "learning_rate": 5.87086490809025e-06, "loss": 0.5363, "step": 978 }, { "epoch": 0.47328982354363064, "grad_norm": 0.41718667843710106, "learning_rate": 5.862992109804071e-06, "loss": 0.5208, "step": 979 }, { "epoch": 0.47377326565143824, "grad_norm": 0.4698208514148652, "learning_rate": 5.855117105640503e-06, "loss": 0.5045, "step": 980 }, { "epoch": 0.47425670775924583, "grad_norm": 0.44330430106357593, "learning_rate": 5.847239915728695e-06, "loss": 0.5213, "step": 981 }, { "epoch": 0.47474014986705343, "grad_norm": 0.46348151704515983, "learning_rate": 5.839360560203379e-06, "loss": 0.5416, "step": 982 }, { "epoch": 0.475223591974861, "grad_norm": 0.4369134554030048, "learning_rate": 5.831479059204822e-06, "loss": 0.5397, "step": 983 }, { "epoch": 0.4757070340826686, "grad_norm": 0.434332595795343, "learning_rate": 5.823595432878775e-06, "loss": 0.5253, "step": 984 }, { "epoch": 0.47619047619047616, "grad_norm": 0.45066877191979876, "learning_rate": 5.815709701376424e-06, "loss": 0.501, "step": 985 }, { "epoch": 0.47667391829828376, "grad_norm": 0.43680722385313575, "learning_rate": 5.8078218848543326e-06, "loss": 0.5425, "step": 986 }, { "epoch": 0.47715736040609136, "grad_norm": 0.462367177083934, "learning_rate": 5.799932003474398e-06, "loss": 0.5293, "step": 987 }, { "epoch": 0.47764080251389895, "grad_norm": 0.46046005650848004, "learning_rate": 5.7920400774037884e-06, "loss": 0.5273, "step": 988 }, { "epoch": 0.47812424462170655, "grad_norm": 0.4325701614014005, "learning_rate": 5.784146126814909e-06, "loss": 0.523, "step": 989 }, { "epoch": 0.47860768672951415, "grad_norm": 0.42688145845821907, "learning_rate": 5.776250171885329e-06, "loss": 0.5289, "step": 990 }, { "epoch": 0.47909112883732174, "grad_norm": 0.4553909518433745, "learning_rate": 5.768352232797748e-06, "loss": 0.5354, "step": 991 }, { "epoch": 0.47957457094512934, "grad_norm": 0.4612709888611227, "learning_rate": 5.760452329739933e-06, "loss": 0.5346, "step": 992 }, { "epoch": 0.48005801305293694, "grad_norm": 0.4671180697912862, "learning_rate": 5.752550482904674e-06, "loss": 0.5381, "step": 993 }, { "epoch": 0.4805414551607445, "grad_norm": 0.44306247126150655, "learning_rate": 5.744646712489729e-06, "loss": 0.5347, "step": 994 }, { "epoch": 0.4810248972685521, "grad_norm": 0.47845590337101257, "learning_rate": 5.736741038697771e-06, "loss": 0.546, "step": 995 }, { "epoch": 0.4815083393763597, "grad_norm": 0.42271596693767594, "learning_rate": 5.728833481736339e-06, "loss": 0.5189, "step": 996 }, { "epoch": 0.48199178148416727, "grad_norm": 0.4538365308944498, "learning_rate": 5.720924061817786e-06, "loss": 0.5405, "step": 997 }, { "epoch": 0.48247522359197487, "grad_norm": 0.4492861025701022, "learning_rate": 5.71301279915923e-06, "loss": 0.5317, "step": 998 }, { "epoch": 0.48295866569978246, "grad_norm": 0.4745326397538611, "learning_rate": 5.705099713982491e-06, "loss": 0.532, "step": 999 }, { "epoch": 0.48344210780759006, "grad_norm": 0.44214626511814337, "learning_rate": 5.697184826514058e-06, "loss": 0.5305, "step": 1000 }, { "epoch": 0.48392554991539766, "grad_norm": 0.46953984488621225, "learning_rate": 5.689268156985015e-06, "loss": 0.5385, "step": 1001 }, { "epoch": 0.4844089920232052, "grad_norm": 0.421787366978404, "learning_rate": 5.6813497256310124e-06, "loss": 0.5468, "step": 1002 }, { "epoch": 0.4848924341310128, "grad_norm": 0.44994209423666665, "learning_rate": 5.673429552692196e-06, "loss": 0.5259, "step": 1003 }, { "epoch": 0.4853758762388204, "grad_norm": 0.4777649344533823, "learning_rate": 5.66550765841317e-06, "loss": 0.5312, "step": 1004 }, { "epoch": 0.485859318346628, "grad_norm": 0.4450135880363142, "learning_rate": 5.6575840630429295e-06, "loss": 0.5234, "step": 1005 }, { "epoch": 0.4863427604544356, "grad_norm": 0.48478431421225965, "learning_rate": 5.649658786834825e-06, "loss": 0.5337, "step": 1006 }, { "epoch": 0.4868262025622432, "grad_norm": 0.4600376201705413, "learning_rate": 5.641731850046503e-06, "loss": 0.5292, "step": 1007 }, { "epoch": 0.4873096446700508, "grad_norm": 0.46358685580492737, "learning_rate": 5.633803272939851e-06, "loss": 0.5033, "step": 1008 }, { "epoch": 0.4877930867778584, "grad_norm": 0.4424655348417247, "learning_rate": 5.62587307578095e-06, "loss": 0.5199, "step": 1009 }, { "epoch": 0.4882765288856659, "grad_norm": 0.44958190643210255, "learning_rate": 5.6179412788400255e-06, "loss": 0.5285, "step": 1010 }, { "epoch": 0.4887599709934735, "grad_norm": 0.46154354908053036, "learning_rate": 5.610007902391387e-06, "loss": 0.5302, "step": 1011 }, { "epoch": 0.4892434131012811, "grad_norm": 0.4787830389554928, "learning_rate": 5.602072966713389e-06, "loss": 0.5319, "step": 1012 }, { "epoch": 0.4897268552090887, "grad_norm": 0.46247126201641375, "learning_rate": 5.594136492088363e-06, "loss": 0.533, "step": 1013 }, { "epoch": 0.4902102973168963, "grad_norm": 0.4497969101747785, "learning_rate": 5.586198498802577e-06, "loss": 0.5207, "step": 1014 }, { "epoch": 0.4906937394247039, "grad_norm": 0.4711750674532031, "learning_rate": 5.578259007146183e-06, "loss": 0.5182, "step": 1015 }, { "epoch": 0.4911771815325115, "grad_norm": 0.44280948751548693, "learning_rate": 5.570318037413162e-06, "loss": 0.5335, "step": 1016 }, { "epoch": 0.4916606236403191, "grad_norm": 0.4140593593091692, "learning_rate": 5.562375609901273e-06, "loss": 0.498, "step": 1017 }, { "epoch": 0.49214406574812664, "grad_norm": 0.5035482968527856, "learning_rate": 5.5544317449119975e-06, "loss": 0.535, "step": 1018 }, { "epoch": 0.49262750785593423, "grad_norm": 0.470430802340302, "learning_rate": 5.546486462750499e-06, "loss": 0.529, "step": 1019 }, { "epoch": 0.49311094996374183, "grad_norm": 0.4614608636774156, "learning_rate": 5.538539783725556e-06, "loss": 0.5415, "step": 1020 }, { "epoch": 0.4935943920715494, "grad_norm": 0.43439106514800707, "learning_rate": 5.530591728149522e-06, "loss": 0.5237, "step": 1021 }, { "epoch": 0.494077834179357, "grad_norm": 0.4532581332185661, "learning_rate": 5.522642316338268e-06, "loss": 0.5275, "step": 1022 }, { "epoch": 0.4945612762871646, "grad_norm": 0.44400011265861933, "learning_rate": 5.51469156861113e-06, "loss": 0.5279, "step": 1023 }, { "epoch": 0.4950447183949722, "grad_norm": 0.464713276215896, "learning_rate": 5.50673950529086e-06, "loss": 0.5261, "step": 1024 }, { "epoch": 0.4955281605027798, "grad_norm": 0.46801515065857147, "learning_rate": 5.498786146703575e-06, "loss": 0.5392, "step": 1025 }, { "epoch": 0.4960116026105874, "grad_norm": 0.43826397020984875, "learning_rate": 5.490831513178698e-06, "loss": 0.513, "step": 1026 }, { "epoch": 0.49649504471839495, "grad_norm": 0.4545609138302334, "learning_rate": 5.482875625048916e-06, "loss": 0.5342, "step": 1027 }, { "epoch": 0.49697848682620255, "grad_norm": 0.45528220157554766, "learning_rate": 5.474918502650116e-06, "loss": 0.5371, "step": 1028 }, { "epoch": 0.49746192893401014, "grad_norm": 0.4918245291291218, "learning_rate": 5.466960166321348e-06, "loss": 0.5248, "step": 1029 }, { "epoch": 0.49794537104181774, "grad_norm": 0.43603238272665606, "learning_rate": 5.459000636404759e-06, "loss": 0.5236, "step": 1030 }, { "epoch": 0.49842881314962534, "grad_norm": 0.43802907063700564, "learning_rate": 5.451039933245551e-06, "loss": 0.5342, "step": 1031 }, { "epoch": 0.49891225525743294, "grad_norm": 0.4660487433831574, "learning_rate": 5.44307807719192e-06, "loss": 0.5249, "step": 1032 }, { "epoch": 0.49939569736524053, "grad_norm": 0.4349479566561577, "learning_rate": 5.435115088595016e-06, "loss": 0.4997, "step": 1033 }, { "epoch": 0.49987913947304813, "grad_norm": 0.43592525172206065, "learning_rate": 5.4271509878088755e-06, "loss": 0.5263, "step": 1034 }, { "epoch": 0.5003625815808557, "grad_norm": 0.42380458268711035, "learning_rate": 5.4191857951903825e-06, "loss": 0.503, "step": 1035 }, { "epoch": 0.5008460236886633, "grad_norm": 0.47347721204466, "learning_rate": 5.4112195310992144e-06, "loss": 0.5228, "step": 1036 }, { "epoch": 0.5013294657964709, "grad_norm": 0.4523581952116975, "learning_rate": 5.403252215897781e-06, "loss": 0.5295, "step": 1037 }, { "epoch": 0.5018129079042785, "grad_norm": 0.44718772307460525, "learning_rate": 5.395283869951184e-06, "loss": 0.5402, "step": 1038 }, { "epoch": 0.5022963500120861, "grad_norm": 0.42574725470561453, "learning_rate": 5.387314513627156e-06, "loss": 0.5228, "step": 1039 }, { "epoch": 0.5027797921198937, "grad_norm": 0.4816073830916942, "learning_rate": 5.379344167296017e-06, "loss": 0.5302, "step": 1040 }, { "epoch": 0.5032632342277013, "grad_norm": 0.454535172796951, "learning_rate": 5.371372851330612e-06, "loss": 0.5337, "step": 1041 }, { "epoch": 0.5037466763355088, "grad_norm": 0.44304402033518747, "learning_rate": 5.3634005861062675e-06, "loss": 0.5348, "step": 1042 }, { "epoch": 0.5042301184433164, "grad_norm": 0.4647077624164304, "learning_rate": 5.355427392000736e-06, "loss": 0.5367, "step": 1043 }, { "epoch": 0.504713560551124, "grad_norm": 0.47204921033701974, "learning_rate": 5.347453289394146e-06, "loss": 0.5236, "step": 1044 }, { "epoch": 0.5051970026589316, "grad_norm": 0.444269913904137, "learning_rate": 5.339478298668943e-06, "loss": 0.5374, "step": 1045 }, { "epoch": 0.5056804447667392, "grad_norm": 0.437749383717649, "learning_rate": 5.331502440209849e-06, "loss": 0.529, "step": 1046 }, { "epoch": 0.5061638868745467, "grad_norm": 0.44850371734571, "learning_rate": 5.3235257344037996e-06, "loss": 0.5363, "step": 1047 }, { "epoch": 0.5066473289823543, "grad_norm": 0.4687290830806752, "learning_rate": 5.3155482016398995e-06, "loss": 0.5335, "step": 1048 }, { "epoch": 0.5071307710901619, "grad_norm": 0.43863314490483407, "learning_rate": 5.307569862309363e-06, "loss": 0.5269, "step": 1049 }, { "epoch": 0.5076142131979695, "grad_norm": 0.44499358455418236, "learning_rate": 5.29959073680547e-06, "loss": 0.5269, "step": 1050 }, { "epoch": 0.5080976553057771, "grad_norm": 0.4443034689772289, "learning_rate": 5.2916108455235084e-06, "loss": 0.5247, "step": 1051 }, { "epoch": 0.5085810974135847, "grad_norm": 0.41196428985814043, "learning_rate": 5.2836302088607235e-06, "loss": 0.5257, "step": 1052 }, { "epoch": 0.5090645395213923, "grad_norm": 0.4196111969098687, "learning_rate": 5.275648847216263e-06, "loss": 0.5326, "step": 1053 }, { "epoch": 0.5095479816291999, "grad_norm": 0.4396027925039716, "learning_rate": 5.267666780991135e-06, "loss": 0.5384, "step": 1054 }, { "epoch": 0.5100314237370075, "grad_norm": 0.4918850008899013, "learning_rate": 5.259684030588141e-06, "loss": 0.5217, "step": 1055 }, { "epoch": 0.5105148658448151, "grad_norm": 0.4636011028874602, "learning_rate": 5.251700616411836e-06, "loss": 0.5292, "step": 1056 }, { "epoch": 0.5109983079526227, "grad_norm": 0.45080015588791633, "learning_rate": 5.243716558868469e-06, "loss": 0.5335, "step": 1057 }, { "epoch": 0.5114817500604303, "grad_norm": 0.4430606358386943, "learning_rate": 5.235731878365935e-06, "loss": 0.5366, "step": 1058 }, { "epoch": 0.5119651921682379, "grad_norm": 0.4765633831252005, "learning_rate": 5.22774659531372e-06, "loss": 0.5343, "step": 1059 }, { "epoch": 0.5124486342760455, "grad_norm": 0.47759199812886083, "learning_rate": 5.219760730122854e-06, "loss": 0.5318, "step": 1060 }, { "epoch": 0.5129320763838531, "grad_norm": 0.4277450357502153, "learning_rate": 5.211774303205849e-06, "loss": 0.5055, "step": 1061 }, { "epoch": 0.5134155184916607, "grad_norm": 0.42824551582926373, "learning_rate": 5.203787334976655e-06, "loss": 0.5015, "step": 1062 }, { "epoch": 0.5138989605994683, "grad_norm": 0.5041405711353805, "learning_rate": 5.195799845850611e-06, "loss": 0.525, "step": 1063 }, { "epoch": 0.5143824027072758, "grad_norm": 0.4306413852615145, "learning_rate": 5.18781185624438e-06, "loss": 0.5265, "step": 1064 }, { "epoch": 0.5148658448150834, "grad_norm": 0.4361432944536921, "learning_rate": 5.179823386575908e-06, "loss": 0.5311, "step": 1065 }, { "epoch": 0.515349286922891, "grad_norm": 0.4240690760700838, "learning_rate": 5.171834457264364e-06, "loss": 0.5286, "step": 1066 }, { "epoch": 0.5158327290306985, "grad_norm": 0.47256746355593565, "learning_rate": 5.1638450887301006e-06, "loss": 0.5282, "step": 1067 }, { "epoch": 0.5163161711385061, "grad_norm": 0.4445988854998036, "learning_rate": 5.155855301394585e-06, "loss": 0.527, "step": 1068 }, { "epoch": 0.5167996132463137, "grad_norm": 0.46405127876443125, "learning_rate": 5.147865115680357e-06, "loss": 0.5289, "step": 1069 }, { "epoch": 0.5172830553541213, "grad_norm": 0.45801111107179227, "learning_rate": 5.139874552010975e-06, "loss": 0.531, "step": 1070 }, { "epoch": 0.5177664974619289, "grad_norm": 0.454345233725776, "learning_rate": 5.131883630810966e-06, "loss": 0.5428, "step": 1071 }, { "epoch": 0.5182499395697365, "grad_norm": 0.4649418568564353, "learning_rate": 5.123892372505768e-06, "loss": 0.524, "step": 1072 }, { "epoch": 0.5187333816775441, "grad_norm": 0.4609921972037312, "learning_rate": 5.11590079752168e-06, "loss": 0.5337, "step": 1073 }, { "epoch": 0.5192168237853517, "grad_norm": 0.4257268056499296, "learning_rate": 5.107908926285813e-06, "loss": 0.5247, "step": 1074 }, { "epoch": 0.5197002658931593, "grad_norm": 0.4277925159892485, "learning_rate": 5.099916779226032e-06, "loss": 0.5314, "step": 1075 }, { "epoch": 0.5201837080009669, "grad_norm": 0.46374173796570095, "learning_rate": 5.091924376770912e-06, "loss": 0.5267, "step": 1076 }, { "epoch": 0.5206671501087745, "grad_norm": 0.46413399868377414, "learning_rate": 5.083931739349675e-06, "loss": 0.5227, "step": 1077 }, { "epoch": 0.5211505922165821, "grad_norm": 0.42805047816232233, "learning_rate": 5.075938887392149e-06, "loss": 0.5148, "step": 1078 }, { "epoch": 0.5216340343243897, "grad_norm": 0.42348168283938675, "learning_rate": 5.0679458413287055e-06, "loss": 0.5168, "step": 1079 }, { "epoch": 0.5221174764321972, "grad_norm": 0.48349127354280697, "learning_rate": 5.059952621590216e-06, "loss": 0.5274, "step": 1080 }, { "epoch": 0.5226009185400048, "grad_norm": 0.447828940551305, "learning_rate": 5.051959248607993e-06, "loss": 0.5251, "step": 1081 }, { "epoch": 0.5230843606478124, "grad_norm": 0.45995219297674483, "learning_rate": 5.043965742813744e-06, "loss": 0.5246, "step": 1082 }, { "epoch": 0.52356780275562, "grad_norm": 0.4744443087812869, "learning_rate": 5.035972124639511e-06, "loss": 0.5299, "step": 1083 }, { "epoch": 0.5240512448634276, "grad_norm": 0.4140590259366713, "learning_rate": 5.02797841451763e-06, "loss": 0.5273, "step": 1084 }, { "epoch": 0.5245346869712352, "grad_norm": 0.4421687910150297, "learning_rate": 5.019984632880665e-06, "loss": 0.5342, "step": 1085 }, { "epoch": 0.5250181290790428, "grad_norm": 0.4665180180836343, "learning_rate": 5.011990800161369e-06, "loss": 0.5314, "step": 1086 }, { "epoch": 0.5255015711868504, "grad_norm": 0.45569707259235365, "learning_rate": 5.00399693679262e-06, "loss": 0.5291, "step": 1087 }, { "epoch": 0.525985013294658, "grad_norm": 0.3966652463954235, "learning_rate": 4.9960030632073815e-06, "loss": 0.4852, "step": 1088 }, { "epoch": 0.5264684554024656, "grad_norm": 0.4156779611396039, "learning_rate": 4.988009199838632e-06, "loss": 0.5266, "step": 1089 }, { "epoch": 0.5269518975102732, "grad_norm": 0.4459927015276638, "learning_rate": 4.980015367119336e-06, "loss": 0.5128, "step": 1090 }, { "epoch": 0.5274353396180808, "grad_norm": 0.4470940459251613, "learning_rate": 4.9720215854823716e-06, "loss": 0.5215, "step": 1091 }, { "epoch": 0.5279187817258884, "grad_norm": 0.4379040861596386, "learning_rate": 4.96402787536049e-06, "loss": 0.529, "step": 1092 }, { "epoch": 0.528402223833696, "grad_norm": 0.42846117019918506, "learning_rate": 4.956034257186258e-06, "loss": 0.5196, "step": 1093 }, { "epoch": 0.5288856659415035, "grad_norm": 0.45571668646782787, "learning_rate": 4.9480407513920086e-06, "loss": 0.527, "step": 1094 }, { "epoch": 0.5293691080493111, "grad_norm": 0.4680208745726276, "learning_rate": 4.940047378409786e-06, "loss": 0.523, "step": 1095 }, { "epoch": 0.5298525501571187, "grad_norm": 0.44029959009092084, "learning_rate": 4.932054158671295e-06, "loss": 0.5244, "step": 1096 }, { "epoch": 0.5303359922649262, "grad_norm": 0.45471058687459337, "learning_rate": 4.924061112607853e-06, "loss": 0.532, "step": 1097 }, { "epoch": 0.5308194343727338, "grad_norm": 0.43297324457453135, "learning_rate": 4.9160682606503255e-06, "loss": 0.5226, "step": 1098 }, { "epoch": 0.5313028764805414, "grad_norm": 0.4269816974887771, "learning_rate": 4.908075623229089e-06, "loss": 0.5242, "step": 1099 }, { "epoch": 0.531786318588349, "grad_norm": 0.4280455143246443, "learning_rate": 4.900083220773968e-06, "loss": 0.5082, "step": 1100 }, { "epoch": 0.5322697606961566, "grad_norm": 0.4422972364197889, "learning_rate": 4.892091073714189e-06, "loss": 0.5162, "step": 1101 }, { "epoch": 0.5327532028039642, "grad_norm": 0.44177794728378245, "learning_rate": 4.88409920247832e-06, "loss": 0.5287, "step": 1102 }, { "epoch": 0.5332366449117718, "grad_norm": 0.4384466281073893, "learning_rate": 4.876107627494234e-06, "loss": 0.5224, "step": 1103 }, { "epoch": 0.5337200870195794, "grad_norm": 0.42791066381831555, "learning_rate": 4.868116369189033e-06, "loss": 0.519, "step": 1104 }, { "epoch": 0.534203529127387, "grad_norm": 0.43287603203122865, "learning_rate": 4.860125447989026e-06, "loss": 0.5315, "step": 1105 }, { "epoch": 0.5346869712351946, "grad_norm": 0.40835340581900775, "learning_rate": 4.852134884319646e-06, "loss": 0.5013, "step": 1106 }, { "epoch": 0.5351704133430022, "grad_norm": 0.4564877621647975, "learning_rate": 4.844144698605418e-06, "loss": 0.5163, "step": 1107 }, { "epoch": 0.5356538554508098, "grad_norm": 0.4350849446506223, "learning_rate": 4.836154911269902e-06, "loss": 0.5216, "step": 1108 }, { "epoch": 0.5361372975586174, "grad_norm": 0.45777989132001196, "learning_rate": 4.8281655427356375e-06, "loss": 0.5248, "step": 1109 }, { "epoch": 0.536620739666425, "grad_norm": 0.463821530722432, "learning_rate": 4.820176613424095e-06, "loss": 0.5302, "step": 1110 }, { "epoch": 0.5371041817742326, "grad_norm": 0.44517887708797427, "learning_rate": 4.812188143755621e-06, "loss": 0.5214, "step": 1111 }, { "epoch": 0.5375876238820402, "grad_norm": 0.5017058167871686, "learning_rate": 4.80420015414939e-06, "loss": 0.5305, "step": 1112 }, { "epoch": 0.5380710659898477, "grad_norm": 0.4672768776486219, "learning_rate": 4.796212665023345e-06, "loss": 0.5237, "step": 1113 }, { "epoch": 0.5385545080976553, "grad_norm": 0.4419995316595335, "learning_rate": 4.788225696794153e-06, "loss": 0.5277, "step": 1114 }, { "epoch": 0.5390379502054629, "grad_norm": 0.44730511943868706, "learning_rate": 4.780239269877147e-06, "loss": 0.5313, "step": 1115 }, { "epoch": 0.5395213923132705, "grad_norm": 0.45204850110236905, "learning_rate": 4.7722534046862805e-06, "loss": 0.5231, "step": 1116 }, { "epoch": 0.540004834421078, "grad_norm": 0.45779960123303604, "learning_rate": 4.764268121634066e-06, "loss": 0.507, "step": 1117 }, { "epoch": 0.5404882765288856, "grad_norm": 0.41695878051285573, "learning_rate": 4.7562834411315324e-06, "loss": 0.5042, "step": 1118 }, { "epoch": 0.5409717186366932, "grad_norm": 0.4513235919318933, "learning_rate": 4.748299383588167e-06, "loss": 0.5258, "step": 1119 }, { "epoch": 0.5414551607445008, "grad_norm": 0.467590473541893, "learning_rate": 4.74031596941186e-06, "loss": 0.5375, "step": 1120 }, { "epoch": 0.5419386028523084, "grad_norm": 0.4524946558422428, "learning_rate": 4.7323332190088675e-06, "loss": 0.5199, "step": 1121 }, { "epoch": 0.542422044960116, "grad_norm": 0.42113263739650647, "learning_rate": 4.7243511527837374e-06, "loss": 0.5251, "step": 1122 }, { "epoch": 0.5429054870679236, "grad_norm": 0.4636225245244688, "learning_rate": 4.716369791139279e-06, "loss": 0.5308, "step": 1123 }, { "epoch": 0.5433889291757312, "grad_norm": 0.4398566927754983, "learning_rate": 4.708389154476492e-06, "loss": 0.5201, "step": 1124 }, { "epoch": 0.5438723712835388, "grad_norm": 0.43330211663967066, "learning_rate": 4.7004092631945315e-06, "loss": 0.5258, "step": 1125 }, { "epoch": 0.5443558133913464, "grad_norm": 0.44482177202458967, "learning_rate": 4.692430137690638e-06, "loss": 0.5222, "step": 1126 }, { "epoch": 0.544839255499154, "grad_norm": 0.4318024796342877, "learning_rate": 4.684451798360102e-06, "loss": 0.5204, "step": 1127 }, { "epoch": 0.5453226976069616, "grad_norm": 0.45444414518744425, "learning_rate": 4.6764742655962e-06, "loss": 0.5255, "step": 1128 }, { "epoch": 0.5458061397147692, "grad_norm": 0.4372839038666406, "learning_rate": 4.6684975597901526e-06, "loss": 0.5275, "step": 1129 }, { "epoch": 0.5462895818225767, "grad_norm": 0.43767287064021165, "learning_rate": 4.660521701331058e-06, "loss": 0.5046, "step": 1130 }, { "epoch": 0.5467730239303843, "grad_norm": 0.42146624184063447, "learning_rate": 4.652546710605857e-06, "loss": 0.5284, "step": 1131 }, { "epoch": 0.5472564660381919, "grad_norm": 0.4528806888549099, "learning_rate": 4.644572607999267e-06, "loss": 0.5234, "step": 1132 }, { "epoch": 0.5477399081459995, "grad_norm": 0.41615032339392954, "learning_rate": 4.636599413893734e-06, "loss": 0.5149, "step": 1133 }, { "epoch": 0.5482233502538071, "grad_norm": 0.4080780400252472, "learning_rate": 4.628627148669391e-06, "loss": 0.5069, "step": 1134 }, { "epoch": 0.5487067923616147, "grad_norm": 0.4376373218589361, "learning_rate": 4.620655832703984e-06, "loss": 0.5232, "step": 1135 }, { "epoch": 0.5491902344694223, "grad_norm": 0.44817086720050736, "learning_rate": 4.612685486372846e-06, "loss": 0.5284, "step": 1136 }, { "epoch": 0.5496736765772299, "grad_norm": 0.4269717422470433, "learning_rate": 4.604716130048818e-06, "loss": 0.5292, "step": 1137 }, { "epoch": 0.5501571186850375, "grad_norm": 0.4463889311004707, "learning_rate": 4.596747784102221e-06, "loss": 0.5296, "step": 1138 }, { "epoch": 0.5506405607928451, "grad_norm": 0.4544805899306494, "learning_rate": 4.588780468900787e-06, "loss": 0.5342, "step": 1139 }, { "epoch": 0.5511240029006527, "grad_norm": 0.43223974279832084, "learning_rate": 4.580814204809618e-06, "loss": 0.5278, "step": 1140 }, { "epoch": 0.5516074450084603, "grad_norm": 0.4526101073084036, "learning_rate": 4.572849012191126e-06, "loss": 0.5274, "step": 1141 }, { "epoch": 0.5520908871162679, "grad_norm": 0.46135315991278786, "learning_rate": 4.564884911404986e-06, "loss": 0.5308, "step": 1142 }, { "epoch": 0.5525743292240755, "grad_norm": 0.451744932774668, "learning_rate": 4.5569219228080805e-06, "loss": 0.5228, "step": 1143 }, { "epoch": 0.553057771331883, "grad_norm": 0.4431407162306295, "learning_rate": 4.54896006675445e-06, "loss": 0.5001, "step": 1144 }, { "epoch": 0.5535412134396906, "grad_norm": 0.4115173802208087, "learning_rate": 4.540999363595242e-06, "loss": 0.4963, "step": 1145 }, { "epoch": 0.5540246555474981, "grad_norm": 0.418962800898634, "learning_rate": 4.5330398336786526e-06, "loss": 0.5277, "step": 1146 }, { "epoch": 0.5545080976553057, "grad_norm": 0.46372386433812574, "learning_rate": 4.525081497349887e-06, "loss": 0.5427, "step": 1147 }, { "epoch": 0.5549915397631133, "grad_norm": 0.4530031866173555, "learning_rate": 4.517124374951086e-06, "loss": 0.5178, "step": 1148 }, { "epoch": 0.5554749818709209, "grad_norm": 0.440822304311767, "learning_rate": 4.509168486821304e-06, "loss": 0.5225, "step": 1149 }, { "epoch": 0.5559584239787285, "grad_norm": 0.4127961848352273, "learning_rate": 4.501213853296425e-06, "loss": 0.523, "step": 1150 }, { "epoch": 0.5564418660865361, "grad_norm": 0.461288293810827, "learning_rate": 4.493260494709141e-06, "loss": 0.5251, "step": 1151 }, { "epoch": 0.5569253081943437, "grad_norm": 0.43458067348060225, "learning_rate": 4.48530843138887e-06, "loss": 0.5419, "step": 1152 }, { "epoch": 0.5574087503021513, "grad_norm": 0.42472609884541546, "learning_rate": 4.477357683661734e-06, "loss": 0.5318, "step": 1153 }, { "epoch": 0.5578921924099589, "grad_norm": 0.4256107384490566, "learning_rate": 4.469408271850479e-06, "loss": 0.506, "step": 1154 }, { "epoch": 0.5583756345177665, "grad_norm": 0.4098807197744025, "learning_rate": 4.4614602162744455e-06, "loss": 0.5152, "step": 1155 }, { "epoch": 0.5588590766255741, "grad_norm": 0.4390148331821114, "learning_rate": 4.453513537249503e-06, "loss": 0.527, "step": 1156 }, { "epoch": 0.5593425187333817, "grad_norm": 0.4711204869094555, "learning_rate": 4.445568255088003e-06, "loss": 0.5247, "step": 1157 }, { "epoch": 0.5598259608411893, "grad_norm": 0.41653734784347574, "learning_rate": 4.4376243900987296e-06, "loss": 0.5232, "step": 1158 }, { "epoch": 0.5603094029489969, "grad_norm": 0.44411194102788987, "learning_rate": 4.429681962586839e-06, "loss": 0.5365, "step": 1159 }, { "epoch": 0.5607928450568045, "grad_norm": 0.44003715852304276, "learning_rate": 4.421740992853818e-06, "loss": 0.5311, "step": 1160 }, { "epoch": 0.5612762871646121, "grad_norm": 0.43445702749764165, "learning_rate": 4.413801501197424e-06, "loss": 0.5192, "step": 1161 }, { "epoch": 0.5617597292724196, "grad_norm": 0.4315566299087621, "learning_rate": 4.405863507911638e-06, "loss": 0.5007, "step": 1162 }, { "epoch": 0.5622431713802272, "grad_norm": 0.3786991552102392, "learning_rate": 4.3979270332866105e-06, "loss": 0.4867, "step": 1163 }, { "epoch": 0.5627266134880348, "grad_norm": 0.4244074214223604, "learning_rate": 4.389992097608613e-06, "loss": 0.5271, "step": 1164 }, { "epoch": 0.5632100555958424, "grad_norm": 0.430673286759424, "learning_rate": 4.3820587211599745e-06, "loss": 0.5292, "step": 1165 }, { "epoch": 0.56369349770365, "grad_norm": 0.46109079230422667, "learning_rate": 4.374126924219052e-06, "loss": 0.5197, "step": 1166 }, { "epoch": 0.5641769398114576, "grad_norm": 0.4275407630254517, "learning_rate": 4.366196727060152e-06, "loss": 0.5196, "step": 1167 }, { "epoch": 0.5646603819192652, "grad_norm": 0.4407289737300367, "learning_rate": 4.3582681499535e-06, "loss": 0.5206, "step": 1168 }, { "epoch": 0.5651438240270727, "grad_norm": 0.4119248969357382, "learning_rate": 4.3503412131651765e-06, "loss": 0.5341, "step": 1169 }, { "epoch": 0.5656272661348803, "grad_norm": 0.4621359921349015, "learning_rate": 4.342415936957073e-06, "loss": 0.5225, "step": 1170 }, { "epoch": 0.5661107082426879, "grad_norm": 0.45885127710831636, "learning_rate": 4.334492341586833e-06, "loss": 0.5328, "step": 1171 }, { "epoch": 0.5665941503504955, "grad_norm": 0.4217251248929531, "learning_rate": 4.326570447307804e-06, "loss": 0.5024, "step": 1172 }, { "epoch": 0.5670775924583031, "grad_norm": 0.43349297844561585, "learning_rate": 4.318650274368989e-06, "loss": 0.5302, "step": 1173 }, { "epoch": 0.5675610345661107, "grad_norm": 0.44173213520395654, "learning_rate": 4.310731843014985e-06, "loss": 0.518, "step": 1174 }, { "epoch": 0.5680444766739183, "grad_norm": 0.44550331806578247, "learning_rate": 4.302815173485944e-06, "loss": 0.5262, "step": 1175 }, { "epoch": 0.5685279187817259, "grad_norm": 0.41790214183276025, "learning_rate": 4.294900286017509e-06, "loss": 0.5249, "step": 1176 }, { "epoch": 0.5690113608895335, "grad_norm": 0.4406462454965783, "learning_rate": 4.286987200840772e-06, "loss": 0.5399, "step": 1177 }, { "epoch": 0.5694948029973411, "grad_norm": 0.42899916006281247, "learning_rate": 4.279075938182214e-06, "loss": 0.522, "step": 1178 }, { "epoch": 0.5699782451051486, "grad_norm": 0.39425395091405524, "learning_rate": 4.271166518263662e-06, "loss": 0.4916, "step": 1179 }, { "epoch": 0.5704616872129562, "grad_norm": 0.4325306883401716, "learning_rate": 4.263258961302232e-06, "loss": 0.5297, "step": 1180 }, { "epoch": 0.5709451293207638, "grad_norm": 0.42524338171666753, "learning_rate": 4.255353287510272e-06, "loss": 0.524, "step": 1181 }, { "epoch": 0.5714285714285714, "grad_norm": 0.44279478264935357, "learning_rate": 4.247449517095329e-06, "loss": 0.5215, "step": 1182 }, { "epoch": 0.571912013536379, "grad_norm": 0.4197931530174695, "learning_rate": 4.239547670260069e-06, "loss": 0.5099, "step": 1183 }, { "epoch": 0.5723954556441866, "grad_norm": 0.4310563818067479, "learning_rate": 4.231647767202254e-06, "loss": 0.5191, "step": 1184 }, { "epoch": 0.5728788977519942, "grad_norm": 0.42356166524796646, "learning_rate": 4.223749828114672e-06, "loss": 0.528, "step": 1185 }, { "epoch": 0.5733623398598018, "grad_norm": 0.4095445643557658, "learning_rate": 4.215853873185093e-06, "loss": 0.4862, "step": 1186 }, { "epoch": 0.5738457819676094, "grad_norm": 0.47015761976525233, "learning_rate": 4.2079599225962115e-06, "loss": 0.5183, "step": 1187 }, { "epoch": 0.574329224075417, "grad_norm": 0.4175313339147143, "learning_rate": 4.2000679965256045e-06, "loss": 0.5045, "step": 1188 }, { "epoch": 0.5748126661832246, "grad_norm": 0.43952093100735207, "learning_rate": 4.192178115145668e-06, "loss": 0.4942, "step": 1189 }, { "epoch": 0.5752961082910322, "grad_norm": 0.41782905847478197, "learning_rate": 4.184290298623578e-06, "loss": 0.4962, "step": 1190 }, { "epoch": 0.5757795503988398, "grad_norm": 0.4321674737064156, "learning_rate": 4.176404567121225e-06, "loss": 0.5397, "step": 1191 }, { "epoch": 0.5762629925066474, "grad_norm": 0.4802929727845466, "learning_rate": 4.16852094079518e-06, "loss": 0.5249, "step": 1192 }, { "epoch": 0.576746434614455, "grad_norm": 0.4829071925445664, "learning_rate": 4.160639439796624e-06, "loss": 0.5192, "step": 1193 }, { "epoch": 0.5772298767222626, "grad_norm": 0.41462388567526964, "learning_rate": 4.152760084271305e-06, "loss": 0.5224, "step": 1194 }, { "epoch": 0.57771331883007, "grad_norm": 0.41336785715866026, "learning_rate": 4.1448828943595e-06, "loss": 0.531, "step": 1195 }, { "epoch": 0.5781967609378776, "grad_norm": 0.4424735299028098, "learning_rate": 4.1370078901959306e-06, "loss": 0.5309, "step": 1196 }, { "epoch": 0.5786802030456852, "grad_norm": 0.43755728178912967, "learning_rate": 4.129135091909752e-06, "loss": 0.5314, "step": 1197 }, { "epoch": 0.5791636451534928, "grad_norm": 0.4234445834897067, "learning_rate": 4.121264519624463e-06, "loss": 0.4971, "step": 1198 }, { "epoch": 0.5796470872613004, "grad_norm": 0.4465261505400547, "learning_rate": 4.113396193457887e-06, "loss": 0.5421, "step": 1199 }, { "epoch": 0.580130529369108, "grad_norm": 0.43864096822609294, "learning_rate": 4.105530133522096e-06, "loss": 0.5286, "step": 1200 }, { "epoch": 0.5806139714769156, "grad_norm": 0.44812939972631666, "learning_rate": 4.0976663599233745e-06, "loss": 0.5283, "step": 1201 }, { "epoch": 0.5810974135847232, "grad_norm": 0.41015688502886277, "learning_rate": 4.08980489276216e-06, "loss": 0.5031, "step": 1202 }, { "epoch": 0.5815808556925308, "grad_norm": 0.4371592707730246, "learning_rate": 4.081945752133e-06, "loss": 0.5247, "step": 1203 }, { "epoch": 0.5820642978003384, "grad_norm": 0.4545452075633195, "learning_rate": 4.074088958124488e-06, "loss": 0.5233, "step": 1204 }, { "epoch": 0.582547739908146, "grad_norm": 0.4034341944937049, "learning_rate": 4.066234530819222e-06, "loss": 0.4997, "step": 1205 }, { "epoch": 0.5830311820159536, "grad_norm": 0.44158682076629885, "learning_rate": 4.058382490293755e-06, "loss": 0.5327, "step": 1206 }, { "epoch": 0.5835146241237612, "grad_norm": 0.44126859021633347, "learning_rate": 4.050532856618532e-06, "loss": 0.5172, "step": 1207 }, { "epoch": 0.5839980662315688, "grad_norm": 0.44983862546632325, "learning_rate": 4.0426856498578515e-06, "loss": 0.5321, "step": 1208 }, { "epoch": 0.5844815083393764, "grad_norm": 0.4306823960936312, "learning_rate": 4.034840890069805e-06, "loss": 0.5286, "step": 1209 }, { "epoch": 0.584964950447184, "grad_norm": 0.46647403495000134, "learning_rate": 4.0269985973062325e-06, "loss": 0.5205, "step": 1210 }, { "epoch": 0.5854483925549916, "grad_norm": 0.43512362259260234, "learning_rate": 4.019158791612662e-06, "loss": 0.5186, "step": 1211 }, { "epoch": 0.5859318346627991, "grad_norm": 0.42474616162914414, "learning_rate": 4.0113214930282765e-06, "loss": 0.5019, "step": 1212 }, { "epoch": 0.5864152767706067, "grad_norm": 0.4496255266024326, "learning_rate": 4.003486721585834e-06, "loss": 0.5292, "step": 1213 }, { "epoch": 0.5868987188784143, "grad_norm": 0.4692064186214037, "learning_rate": 3.995654497311649e-06, "loss": 0.524, "step": 1214 }, { "epoch": 0.5873821609862219, "grad_norm": 0.47446797627391324, "learning_rate": 3.987824840225512e-06, "loss": 0.5296, "step": 1215 }, { "epoch": 0.5878656030940295, "grad_norm": 0.45351628397562704, "learning_rate": 3.979997770340664e-06, "loss": 0.5191, "step": 1216 }, { "epoch": 0.5883490452018371, "grad_norm": 0.4469658212338775, "learning_rate": 3.972173307663721e-06, "loss": 0.5259, "step": 1217 }, { "epoch": 0.5888324873096447, "grad_norm": 0.42106841746243373, "learning_rate": 3.964351472194642e-06, "loss": 0.5169, "step": 1218 }, { "epoch": 0.5893159294174523, "grad_norm": 0.46081869250650287, "learning_rate": 3.95653228392667e-06, "loss": 0.5324, "step": 1219 }, { "epoch": 0.5897993715252599, "grad_norm": 0.43787433991965447, "learning_rate": 3.9487157628462784e-06, "loss": 0.5253, "step": 1220 }, { "epoch": 0.5902828136330674, "grad_norm": 0.4446710684339124, "learning_rate": 3.940901928933127e-06, "loss": 0.5207, "step": 1221 }, { "epoch": 0.590766255740875, "grad_norm": 0.4878921355151572, "learning_rate": 3.933090802160004e-06, "loss": 0.5216, "step": 1222 }, { "epoch": 0.5912496978486826, "grad_norm": 0.43014793362728476, "learning_rate": 3.925282402492779e-06, "loss": 0.5158, "step": 1223 }, { "epoch": 0.5917331399564902, "grad_norm": 0.4785437280134671, "learning_rate": 3.917476749890351e-06, "loss": 0.5337, "step": 1224 }, { "epoch": 0.5922165820642978, "grad_norm": 0.4374190045344671, "learning_rate": 3.909673864304597e-06, "loss": 0.528, "step": 1225 }, { "epoch": 0.5927000241721054, "grad_norm": 0.4547685840805692, "learning_rate": 3.901873765680322e-06, "loss": 0.5366, "step": 1226 }, { "epoch": 0.593183466279913, "grad_norm": 0.4594395711536057, "learning_rate": 3.894076473955207e-06, "loss": 0.5202, "step": 1227 }, { "epoch": 0.5936669083877205, "grad_norm": 0.5197830445285274, "learning_rate": 3.886282009059757e-06, "loss": 0.5293, "step": 1228 }, { "epoch": 0.5941503504955281, "grad_norm": 0.438870970435199, "learning_rate": 3.878490390917253e-06, "loss": 0.521, "step": 1229 }, { "epoch": 0.5946337926033357, "grad_norm": 0.43955571970066076, "learning_rate": 3.8707016394436985e-06, "loss": 0.5219, "step": 1230 }, { "epoch": 0.5951172347111433, "grad_norm": 0.4319400896121196, "learning_rate": 3.86291577454777e-06, "loss": 0.5274, "step": 1231 }, { "epoch": 0.5956006768189509, "grad_norm": 0.42866945423374303, "learning_rate": 3.855132816130767e-06, "loss": 0.5287, "step": 1232 }, { "epoch": 0.5960841189267585, "grad_norm": 0.4905566924544953, "learning_rate": 3.847352784086556e-06, "loss": 0.5214, "step": 1233 }, { "epoch": 0.5965675610345661, "grad_norm": 0.4815238783873125, "learning_rate": 3.839575698301529e-06, "loss": 0.5348, "step": 1234 }, { "epoch": 0.5970510031423737, "grad_norm": 0.4611545386580029, "learning_rate": 3.831801578654541e-06, "loss": 0.521, "step": 1235 }, { "epoch": 0.5975344452501813, "grad_norm": 0.44138236799554953, "learning_rate": 3.8240304450168716e-06, "loss": 0.5012, "step": 1236 }, { "epoch": 0.5980178873579889, "grad_norm": 0.4729557809805851, "learning_rate": 3.8162623172521615e-06, "loss": 0.5239, "step": 1237 }, { "epoch": 0.5985013294657965, "grad_norm": 0.47751713396917145, "learning_rate": 3.808497215216374e-06, "loss": 0.518, "step": 1238 }, { "epoch": 0.5989847715736041, "grad_norm": 0.4632727328684821, "learning_rate": 3.8007351587577342e-06, "loss": 0.5212, "step": 1239 }, { "epoch": 0.5994682136814117, "grad_norm": 0.43452448731632676, "learning_rate": 3.7929761677166847e-06, "loss": 0.5256, "step": 1240 }, { "epoch": 0.5999516557892193, "grad_norm": 0.41707858133064807, "learning_rate": 3.7852202619258327e-06, "loss": 0.5258, "step": 1241 }, { "epoch": 0.6004350978970269, "grad_norm": 0.42484738876263983, "learning_rate": 3.777467461209895e-06, "loss": 0.5226, "step": 1242 }, { "epoch": 0.6009185400048345, "grad_norm": 0.46278022630826876, "learning_rate": 3.76971778538566e-06, "loss": 0.5265, "step": 1243 }, { "epoch": 0.6014019821126421, "grad_norm": 0.4136178806145892, "learning_rate": 3.76197125426192e-06, "loss": 0.521, "step": 1244 }, { "epoch": 0.6018854242204495, "grad_norm": 0.42100500706131366, "learning_rate": 3.754227887639434e-06, "loss": 0.5119, "step": 1245 }, { "epoch": 0.6023688663282571, "grad_norm": 0.4123940096542578, "learning_rate": 3.7464877053108706e-06, "loss": 0.5258, "step": 1246 }, { "epoch": 0.6028523084360647, "grad_norm": 0.4195138029502561, "learning_rate": 3.7387507270607617e-06, "loss": 0.529, "step": 1247 }, { "epoch": 0.6033357505438723, "grad_norm": 0.45482061749805036, "learning_rate": 3.7310169726654444e-06, "loss": 0.528, "step": 1248 }, { "epoch": 0.6038191926516799, "grad_norm": 0.4371974850319641, "learning_rate": 3.7232864618930217e-06, "loss": 0.5182, "step": 1249 }, { "epoch": 0.6043026347594875, "grad_norm": 0.43099991632770085, "learning_rate": 3.715559214503298e-06, "loss": 0.5133, "step": 1250 }, { "epoch": 0.6047860768672951, "grad_norm": 0.4048318396541149, "learning_rate": 3.707835250247745e-06, "loss": 0.4877, "step": 1251 }, { "epoch": 0.6052695189751027, "grad_norm": 0.43040118158566426, "learning_rate": 3.7001145888694335e-06, "loss": 0.5256, "step": 1252 }, { "epoch": 0.6057529610829103, "grad_norm": 0.43151561170744174, "learning_rate": 3.6923972501029996e-06, "loss": 0.5028, "step": 1253 }, { "epoch": 0.6062364031907179, "grad_norm": 0.4168861988285146, "learning_rate": 3.684683253674583e-06, "loss": 0.5249, "step": 1254 }, { "epoch": 0.6067198452985255, "grad_norm": 0.4341349343804055, "learning_rate": 3.676972619301776e-06, "loss": 0.5119, "step": 1255 }, { "epoch": 0.6072032874063331, "grad_norm": 0.45716952276328937, "learning_rate": 3.6692653666935875e-06, "loss": 0.5262, "step": 1256 }, { "epoch": 0.6076867295141407, "grad_norm": 0.4304219021067875, "learning_rate": 3.6615615155503703e-06, "loss": 0.5168, "step": 1257 }, { "epoch": 0.6081701716219483, "grad_norm": 0.4098861167543977, "learning_rate": 3.6538610855637953e-06, "loss": 0.5193, "step": 1258 }, { "epoch": 0.6086536137297559, "grad_norm": 0.45344600794993284, "learning_rate": 3.6461640964167755e-06, "loss": 0.5213, "step": 1259 }, { "epoch": 0.6091370558375635, "grad_norm": 0.408785994340013, "learning_rate": 3.638470567783442e-06, "loss": 0.4982, "step": 1260 }, { "epoch": 0.609620497945371, "grad_norm": 0.4481815917202834, "learning_rate": 3.630780519329069e-06, "loss": 0.5329, "step": 1261 }, { "epoch": 0.6101039400531786, "grad_norm": 0.4271346926738626, "learning_rate": 3.623093970710043e-06, "loss": 0.5278, "step": 1262 }, { "epoch": 0.6105873821609862, "grad_norm": 0.4443519643294819, "learning_rate": 3.615410941573799e-06, "loss": 0.5358, "step": 1263 }, { "epoch": 0.6110708242687938, "grad_norm": 0.41093410035963396, "learning_rate": 3.607731451558783e-06, "loss": 0.4973, "step": 1264 }, { "epoch": 0.6115542663766014, "grad_norm": 0.4526917508007144, "learning_rate": 3.6000555202943872e-06, "loss": 0.5223, "step": 1265 }, { "epoch": 0.612037708484409, "grad_norm": 0.458600770079082, "learning_rate": 3.59238316740091e-06, "loss": 0.5206, "step": 1266 }, { "epoch": 0.6125211505922166, "grad_norm": 0.4390522720994981, "learning_rate": 3.584714412489506e-06, "loss": 0.5306, "step": 1267 }, { "epoch": 0.6130045927000242, "grad_norm": 0.41799452227515504, "learning_rate": 3.5770492751621292e-06, "loss": 0.5029, "step": 1268 }, { "epoch": 0.6134880348078318, "grad_norm": 0.43636835439611227, "learning_rate": 3.5693877750114903e-06, "loss": 0.5167, "step": 1269 }, { "epoch": 0.6139714769156394, "grad_norm": 0.43211619164840076, "learning_rate": 3.5617299316209984e-06, "loss": 0.5049, "step": 1270 }, { "epoch": 0.614454919023447, "grad_norm": 0.42547953690955836, "learning_rate": 3.5540757645647217e-06, "loss": 0.4939, "step": 1271 }, { "epoch": 0.6149383611312546, "grad_norm": 0.4170182860561763, "learning_rate": 3.546425293407324e-06, "loss": 0.5199, "step": 1272 }, { "epoch": 0.6154218032390621, "grad_norm": 0.4222424350681242, "learning_rate": 3.5387785377040316e-06, "loss": 0.5132, "step": 1273 }, { "epoch": 0.6159052453468697, "grad_norm": 0.4552864030500758, "learning_rate": 3.531135517000561e-06, "loss": 0.5269, "step": 1274 }, { "epoch": 0.6163886874546773, "grad_norm": 0.43855551812482985, "learning_rate": 3.523496250833098e-06, "loss": 0.5122, "step": 1275 }, { "epoch": 0.6168721295624849, "grad_norm": 0.46449768544610603, "learning_rate": 3.515860758728214e-06, "loss": 0.5234, "step": 1276 }, { "epoch": 0.6173555716702925, "grad_norm": 0.46363731713711515, "learning_rate": 3.5082290602028492e-06, "loss": 0.5269, "step": 1277 }, { "epoch": 0.6178390137781, "grad_norm": 0.42830833200680596, "learning_rate": 3.5006011747642366e-06, "loss": 0.5177, "step": 1278 }, { "epoch": 0.6183224558859076, "grad_norm": 0.45403297568672957, "learning_rate": 3.492977121909865e-06, "loss": 0.5329, "step": 1279 }, { "epoch": 0.6188058979937152, "grad_norm": 0.4707193517755616, "learning_rate": 3.4853569211274306e-06, "loss": 0.5275, "step": 1280 }, { "epoch": 0.6192893401015228, "grad_norm": 0.4382585499371175, "learning_rate": 3.4777405918947795e-06, "loss": 0.5117, "step": 1281 }, { "epoch": 0.6197727822093304, "grad_norm": 0.43247243116721396, "learning_rate": 3.4701281536798638e-06, "loss": 0.5274, "step": 1282 }, { "epoch": 0.620256224317138, "grad_norm": 0.4457194202455219, "learning_rate": 3.462519625940688e-06, "loss": 0.5282, "step": 1283 }, { "epoch": 0.6207396664249456, "grad_norm": 0.45208181214242377, "learning_rate": 3.4549150281252635e-06, "loss": 0.5224, "step": 1284 }, { "epoch": 0.6212231085327532, "grad_norm": 0.43419084686544124, "learning_rate": 3.4473143796715537e-06, "loss": 0.5221, "step": 1285 }, { "epoch": 0.6217065506405608, "grad_norm": 0.45834613229050314, "learning_rate": 3.4397177000074307e-06, "loss": 0.5286, "step": 1286 }, { "epoch": 0.6221899927483684, "grad_norm": 0.4551713196802731, "learning_rate": 3.4321250085506174e-06, "loss": 0.519, "step": 1287 }, { "epoch": 0.622673434856176, "grad_norm": 0.45716705644935435, "learning_rate": 3.4245363247086477e-06, "loss": 0.5291, "step": 1288 }, { "epoch": 0.6231568769639836, "grad_norm": 0.4214582611290155, "learning_rate": 3.4169516678788096e-06, "loss": 0.5084, "step": 1289 }, { "epoch": 0.6236403190717912, "grad_norm": 0.43304150590207136, "learning_rate": 3.4093710574480926e-06, "loss": 0.5181, "step": 1290 }, { "epoch": 0.6241237611795988, "grad_norm": 0.4201723521255349, "learning_rate": 3.4017945127931517e-06, "loss": 0.5215, "step": 1291 }, { "epoch": 0.6246072032874064, "grad_norm": 0.43513341637312203, "learning_rate": 3.394222053280245e-06, "loss": 0.5219, "step": 1292 }, { "epoch": 0.625090645395214, "grad_norm": 0.41778354176218346, "learning_rate": 3.386653698265189e-06, "loss": 0.5295, "step": 1293 }, { "epoch": 0.6255740875030215, "grad_norm": 0.4289158469228602, "learning_rate": 3.3790894670933096e-06, "loss": 0.4993, "step": 1294 }, { "epoch": 0.626057529610829, "grad_norm": 0.4362452363722638, "learning_rate": 3.3715293790993906e-06, "loss": 0.5212, "step": 1295 }, { "epoch": 0.6265409717186367, "grad_norm": 0.4287020140556269, "learning_rate": 3.3639734536076263e-06, "loss": 0.5145, "step": 1296 }, { "epoch": 0.6270244138264442, "grad_norm": 0.4371619386426176, "learning_rate": 3.356421709931573e-06, "loss": 0.5263, "step": 1297 }, { "epoch": 0.6275078559342518, "grad_norm": 0.4558196176944881, "learning_rate": 3.348874167374093e-06, "loss": 0.5193, "step": 1298 }, { "epoch": 0.6279912980420594, "grad_norm": 0.433400379634257, "learning_rate": 3.341330845227316e-06, "loss": 0.5342, "step": 1299 }, { "epoch": 0.628474740149867, "grad_norm": 0.44874534866829735, "learning_rate": 3.33379176277258e-06, "loss": 0.5192, "step": 1300 }, { "epoch": 0.6289581822576746, "grad_norm": 0.42814892447345076, "learning_rate": 3.326256939280389e-06, "loss": 0.5222, "step": 1301 }, { "epoch": 0.6294416243654822, "grad_norm": 0.4175511142456795, "learning_rate": 3.3187263940103587e-06, "loss": 0.5008, "step": 1302 }, { "epoch": 0.6299250664732898, "grad_norm": 0.43012465279292367, "learning_rate": 3.3112001462111666e-06, "loss": 0.5252, "step": 1303 }, { "epoch": 0.6304085085810974, "grad_norm": 0.4343943982087317, "learning_rate": 3.3036782151205134e-06, "loss": 0.5269, "step": 1304 }, { "epoch": 0.630891950688905, "grad_norm": 0.4399879585469454, "learning_rate": 3.296160619965056e-06, "loss": 0.5348, "step": 1305 }, { "epoch": 0.6313753927967126, "grad_norm": 0.41469773355380823, "learning_rate": 3.2886473799603793e-06, "loss": 0.5315, "step": 1306 }, { "epoch": 0.6318588349045202, "grad_norm": 0.4118723735223055, "learning_rate": 3.2811385143109254e-06, "loss": 0.5199, "step": 1307 }, { "epoch": 0.6323422770123278, "grad_norm": 0.42911493217824326, "learning_rate": 3.2736340422099633e-06, "loss": 0.5287, "step": 1308 }, { "epoch": 0.6328257191201354, "grad_norm": 0.42229266051589764, "learning_rate": 3.2661339828395263e-06, "loss": 0.5034, "step": 1309 }, { "epoch": 0.633309161227943, "grad_norm": 0.4171629681078508, "learning_rate": 3.2586383553703723e-06, "loss": 0.5169, "step": 1310 }, { "epoch": 0.6337926033357505, "grad_norm": 0.4335972937122555, "learning_rate": 3.2511471789619274e-06, "loss": 0.5084, "step": 1311 }, { "epoch": 0.6342760454435581, "grad_norm": 0.42465524204681926, "learning_rate": 3.2436604727622447e-06, "loss": 0.5126, "step": 1312 }, { "epoch": 0.6347594875513657, "grad_norm": 0.4075010185296818, "learning_rate": 3.2361782559079465e-06, "loss": 0.5158, "step": 1313 }, { "epoch": 0.6352429296591733, "grad_norm": 0.4308541178278502, "learning_rate": 3.228700547524184e-06, "loss": 0.5145, "step": 1314 }, { "epoch": 0.6357263717669809, "grad_norm": 0.428226473042128, "learning_rate": 3.221227366724581e-06, "loss": 0.5146, "step": 1315 }, { "epoch": 0.6362098138747885, "grad_norm": 0.4299253923140349, "learning_rate": 3.2137587326111896e-06, "loss": 0.5207, "step": 1316 }, { "epoch": 0.6366932559825961, "grad_norm": 0.4181473728080887, "learning_rate": 3.206294664274443e-06, "loss": 0.5268, "step": 1317 }, { "epoch": 0.6371766980904037, "grad_norm": 0.4474913351660176, "learning_rate": 3.198835180793097e-06, "loss": 0.5277, "step": 1318 }, { "epoch": 0.6376601401982113, "grad_norm": 0.432676302059824, "learning_rate": 3.1913803012341987e-06, "loss": 0.5195, "step": 1319 }, { "epoch": 0.6381435823060189, "grad_norm": 0.44131963361999216, "learning_rate": 3.183930044653014e-06, "loss": 0.5157, "step": 1320 }, { "epoch": 0.6386270244138265, "grad_norm": 0.4300971230394046, "learning_rate": 3.176484430093007e-06, "loss": 0.5312, "step": 1321 }, { "epoch": 0.6391104665216341, "grad_norm": 0.44998544162450493, "learning_rate": 3.1690434765857604e-06, "loss": 0.523, "step": 1322 }, { "epoch": 0.6395939086294417, "grad_norm": 0.4472778564601584, "learning_rate": 3.1616072031509594e-06, "loss": 0.5155, "step": 1323 }, { "epoch": 0.6400773507372493, "grad_norm": 0.4278886331070072, "learning_rate": 3.154175628796311e-06, "loss": 0.5214, "step": 1324 }, { "epoch": 0.6405607928450568, "grad_norm": 0.4354238172365612, "learning_rate": 3.146748772517523e-06, "loss": 0.5202, "step": 1325 }, { "epoch": 0.6410442349528644, "grad_norm": 0.48506314375368076, "learning_rate": 3.139326653298236e-06, "loss": 0.5226, "step": 1326 }, { "epoch": 0.6415276770606719, "grad_norm": 0.42689588677759766, "learning_rate": 3.1319092901099847e-06, "loss": 0.5192, "step": 1327 }, { "epoch": 0.6420111191684795, "grad_norm": 0.44087925863432936, "learning_rate": 3.1244967019121496e-06, "loss": 0.5127, "step": 1328 }, { "epoch": 0.6424945612762871, "grad_norm": 0.42605880529211515, "learning_rate": 3.117088907651902e-06, "loss": 0.5176, "step": 1329 }, { "epoch": 0.6429780033840947, "grad_norm": 0.42151112378627853, "learning_rate": 3.109685926264161e-06, "loss": 0.5165, "step": 1330 }, { "epoch": 0.6434614454919023, "grad_norm": 0.4268979096606091, "learning_rate": 3.102287776671544e-06, "loss": 0.5339, "step": 1331 }, { "epoch": 0.6439448875997099, "grad_norm": 0.4392831430463705, "learning_rate": 3.094894477784318e-06, "loss": 0.5166, "step": 1332 }, { "epoch": 0.6444283297075175, "grad_norm": 0.4146076328411298, "learning_rate": 3.0875060485003496e-06, "loss": 0.5274, "step": 1333 }, { "epoch": 0.6449117718153251, "grad_norm": 0.42157017978455763, "learning_rate": 3.080122507705062e-06, "loss": 0.5243, "step": 1334 }, { "epoch": 0.6453952139231327, "grad_norm": 0.41448886842899935, "learning_rate": 3.0727438742713766e-06, "loss": 0.4982, "step": 1335 }, { "epoch": 0.6458786560309403, "grad_norm": 0.4311221980804021, "learning_rate": 3.0653701670596805e-06, "loss": 0.5222, "step": 1336 }, { "epoch": 0.6463620981387479, "grad_norm": 0.42860321112689415, "learning_rate": 3.0580014049177566e-06, "loss": 0.5203, "step": 1337 }, { "epoch": 0.6468455402465555, "grad_norm": 0.42363404336164073, "learning_rate": 3.0506376066807632e-06, "loss": 0.5131, "step": 1338 }, { "epoch": 0.6473289823543631, "grad_norm": 0.4352925336415172, "learning_rate": 3.0432787911711553e-06, "loss": 0.5244, "step": 1339 }, { "epoch": 0.6478124244621707, "grad_norm": 0.4213888881156135, "learning_rate": 3.0359249771986605e-06, "loss": 0.5114, "step": 1340 }, { "epoch": 0.6482958665699783, "grad_norm": 0.44310700039649703, "learning_rate": 3.028576183560221e-06, "loss": 0.5345, "step": 1341 }, { "epoch": 0.6487793086777859, "grad_norm": 0.46384351518694394, "learning_rate": 3.021232429039944e-06, "loss": 0.5103, "step": 1342 }, { "epoch": 0.6492627507855935, "grad_norm": 0.420074285156663, "learning_rate": 3.01389373240906e-06, "loss": 0.5307, "step": 1343 }, { "epoch": 0.649746192893401, "grad_norm": 0.46717936599561755, "learning_rate": 3.006560112425867e-06, "loss": 0.5146, "step": 1344 }, { "epoch": 0.6502296350012086, "grad_norm": 0.4411181920960548, "learning_rate": 2.999231587835691e-06, "loss": 0.5113, "step": 1345 }, { "epoch": 0.6507130771090162, "grad_norm": 0.4353633086273451, "learning_rate": 2.9919081773708293e-06, "loss": 0.5195, "step": 1346 }, { "epoch": 0.6511965192168238, "grad_norm": 0.4555577360086876, "learning_rate": 2.9845898997505102e-06, "loss": 0.5201, "step": 1347 }, { "epoch": 0.6516799613246314, "grad_norm": 0.43234112941806857, "learning_rate": 2.9772767736808406e-06, "loss": 0.5194, "step": 1348 }, { "epoch": 0.652163403432439, "grad_norm": 0.43499867909496204, "learning_rate": 2.9699688178547615e-06, "loss": 0.5252, "step": 1349 }, { "epoch": 0.6526468455402465, "grad_norm": 0.4243771527145887, "learning_rate": 2.962666050951997e-06, "loss": 0.5122, "step": 1350 }, { "epoch": 0.6531302876480541, "grad_norm": 0.4158873835022681, "learning_rate": 2.9553684916390053e-06, "loss": 0.5092, "step": 1351 }, { "epoch": 0.6536137297558617, "grad_norm": 0.4494940146550669, "learning_rate": 2.948076158568939e-06, "loss": 0.5256, "step": 1352 }, { "epoch": 0.6540971718636693, "grad_norm": 0.39860657725442444, "learning_rate": 2.940789070381587e-06, "loss": 0.5001, "step": 1353 }, { "epoch": 0.6545806139714769, "grad_norm": 0.41241371956595596, "learning_rate": 2.933507245703335e-06, "loss": 0.5174, "step": 1354 }, { "epoch": 0.6550640560792845, "grad_norm": 0.41370768103275585, "learning_rate": 2.9262307031471132e-06, "loss": 0.5142, "step": 1355 }, { "epoch": 0.6555474981870921, "grad_norm": 0.4457235721009106, "learning_rate": 2.918959461312353e-06, "loss": 0.5212, "step": 1356 }, { "epoch": 0.6560309402948997, "grad_norm": 0.44189033261031596, "learning_rate": 2.911693538784931e-06, "loss": 0.5315, "step": 1357 }, { "epoch": 0.6565143824027073, "grad_norm": 0.417441027459776, "learning_rate": 2.904432954137136e-06, "loss": 0.5197, "step": 1358 }, { "epoch": 0.6569978245105149, "grad_norm": 0.41446617019595194, "learning_rate": 2.897177725927599e-06, "loss": 0.4977, "step": 1359 }, { "epoch": 0.6574812666183224, "grad_norm": 0.424230216094722, "learning_rate": 2.889927872701278e-06, "loss": 0.5319, "step": 1360 }, { "epoch": 0.65796470872613, "grad_norm": 0.42985260891150956, "learning_rate": 2.8826834129893755e-06, "loss": 0.5166, "step": 1361 }, { "epoch": 0.6584481508339376, "grad_norm": 0.38447115699767576, "learning_rate": 2.8754443653093186e-06, "loss": 0.4786, "step": 1362 }, { "epoch": 0.6589315929417452, "grad_norm": 0.4044201835297723, "learning_rate": 2.8682107481646915e-06, "loss": 0.5216, "step": 1363 }, { "epoch": 0.6594150350495528, "grad_norm": 0.4022645350617216, "learning_rate": 2.8609825800452063e-06, "loss": 0.4988, "step": 1364 }, { "epoch": 0.6598984771573604, "grad_norm": 0.4401229027571195, "learning_rate": 2.853759879426644e-06, "loss": 0.5181, "step": 1365 }, { "epoch": 0.660381919265168, "grad_norm": 0.42349548120906483, "learning_rate": 2.8465426647708067e-06, "loss": 0.5163, "step": 1366 }, { "epoch": 0.6608653613729756, "grad_norm": 0.40013997451662586, "learning_rate": 2.8393309545254776e-06, "loss": 0.5214, "step": 1367 }, { "epoch": 0.6613488034807832, "grad_norm": 0.4359244125864156, "learning_rate": 2.8321247671243695e-06, "loss": 0.5179, "step": 1368 }, { "epoch": 0.6618322455885908, "grad_norm": 0.41425895295471055, "learning_rate": 2.82492412098708e-06, "loss": 0.5081, "step": 1369 }, { "epoch": 0.6623156876963984, "grad_norm": 0.4210065663342879, "learning_rate": 2.8177290345190387e-06, "loss": 0.5194, "step": 1370 }, { "epoch": 0.662799129804206, "grad_norm": 0.4028980901393777, "learning_rate": 2.8105395261114666e-06, "loss": 0.5234, "step": 1371 }, { "epoch": 0.6632825719120136, "grad_norm": 0.4325922757476261, "learning_rate": 2.803355614141327e-06, "loss": 0.5188, "step": 1372 }, { "epoch": 0.6637660140198212, "grad_norm": 0.4308186918740408, "learning_rate": 2.7961773169712803e-06, "loss": 0.5125, "step": 1373 }, { "epoch": 0.6642494561276288, "grad_norm": 0.4211885259856405, "learning_rate": 2.7890046529496284e-06, "loss": 0.5233, "step": 1374 }, { "epoch": 0.6647328982354364, "grad_norm": 0.4304676159038956, "learning_rate": 2.7818376404102832e-06, "loss": 0.5188, "step": 1375 }, { "epoch": 0.665216340343244, "grad_norm": 0.4137521174014562, "learning_rate": 2.774676297672701e-06, "loss": 0.5248, "step": 1376 }, { "epoch": 0.6656997824510514, "grad_norm": 0.4389331875357886, "learning_rate": 2.7675206430418542e-06, "loss": 0.5265, "step": 1377 }, { "epoch": 0.666183224558859, "grad_norm": 0.46429330512304384, "learning_rate": 2.7603706948081745e-06, "loss": 0.5211, "step": 1378 }, { "epoch": 0.6666666666666666, "grad_norm": 0.4260734411731187, "learning_rate": 2.753226471247501e-06, "loss": 0.517, "step": 1379 }, { "epoch": 0.6671501087744742, "grad_norm": 0.4189810127916622, "learning_rate": 2.7460879906210485e-06, "loss": 0.5107, "step": 1380 }, { "epoch": 0.6676335508822818, "grad_norm": 0.41204910620329505, "learning_rate": 2.7389552711753477e-06, "loss": 0.5191, "step": 1381 }, { "epoch": 0.6681169929900894, "grad_norm": 0.4267680612975131, "learning_rate": 2.731828331142207e-06, "loss": 0.5128, "step": 1382 }, { "epoch": 0.668600435097897, "grad_norm": 0.42901984315752384, "learning_rate": 2.7247071887386544e-06, "loss": 0.5257, "step": 1383 }, { "epoch": 0.6690838772057046, "grad_norm": 0.4146728225846163, "learning_rate": 2.7175918621669074e-06, "loss": 0.5184, "step": 1384 }, { "epoch": 0.6695673193135122, "grad_norm": 0.4782489091382579, "learning_rate": 2.7104823696143136e-06, "loss": 0.5298, "step": 1385 }, { "epoch": 0.6700507614213198, "grad_norm": 0.40703765978893935, "learning_rate": 2.70337872925331e-06, "loss": 0.5111, "step": 1386 }, { "epoch": 0.6705342035291274, "grad_norm": 0.4109547447766556, "learning_rate": 2.6962809592413726e-06, "loss": 0.5002, "step": 1387 }, { "epoch": 0.671017645636935, "grad_norm": 0.4498968198632276, "learning_rate": 2.6891890777209696e-06, "loss": 0.5256, "step": 1388 }, { "epoch": 0.6715010877447426, "grad_norm": 0.42208190857564254, "learning_rate": 2.68210310281953e-06, "loss": 0.5193, "step": 1389 }, { "epoch": 0.6719845298525502, "grad_norm": 0.41822528698390377, "learning_rate": 2.67502305264937e-06, "loss": 0.5163, "step": 1390 }, { "epoch": 0.6724679719603578, "grad_norm": 0.4218034674050614, "learning_rate": 2.667948945307674e-06, "loss": 0.5174, "step": 1391 }, { "epoch": 0.6729514140681654, "grad_norm": 0.4206471334382422, "learning_rate": 2.6608807988764252e-06, "loss": 0.4936, "step": 1392 }, { "epoch": 0.6734348561759729, "grad_norm": 0.42181885072694014, "learning_rate": 2.653818631422378e-06, "loss": 0.5138, "step": 1393 }, { "epoch": 0.6739182982837805, "grad_norm": 0.4261589725068296, "learning_rate": 2.6467624609970005e-06, "loss": 0.5145, "step": 1394 }, { "epoch": 0.6744017403915881, "grad_norm": 0.40519700853309554, "learning_rate": 2.6397123056364364e-06, "loss": 0.5013, "step": 1395 }, { "epoch": 0.6748851824993957, "grad_norm": 0.4510436140721377, "learning_rate": 2.6326681833614464e-06, "loss": 0.5184, "step": 1396 }, { "epoch": 0.6753686246072033, "grad_norm": 0.45003681113297744, "learning_rate": 2.6256301121773775e-06, "loss": 0.5149, "step": 1397 }, { "epoch": 0.6758520667150109, "grad_norm": 0.4263810181960221, "learning_rate": 2.618598110074105e-06, "loss": 0.5115, "step": 1398 }, { "epoch": 0.6763355088228185, "grad_norm": 0.4263005297393967, "learning_rate": 2.6115721950259977e-06, "loss": 0.5243, "step": 1399 }, { "epoch": 0.676818950930626, "grad_norm": 0.4563967376255983, "learning_rate": 2.6045523849918553e-06, "loss": 0.5314, "step": 1400 }, { "epoch": 0.6773023930384336, "grad_norm": 0.4478593986013541, "learning_rate": 2.5975386979148792e-06, "loss": 0.5179, "step": 1401 }, { "epoch": 0.6777858351462412, "grad_norm": 0.40707261007936574, "learning_rate": 2.590531151722622e-06, "loss": 0.5165, "step": 1402 }, { "epoch": 0.6782692772540488, "grad_norm": 0.45689999209163507, "learning_rate": 2.5835297643269326e-06, "loss": 0.5212, "step": 1403 }, { "epoch": 0.6787527193618564, "grad_norm": 0.41521250872284, "learning_rate": 2.576534553623925e-06, "loss": 0.5197, "step": 1404 }, { "epoch": 0.679236161469664, "grad_norm": 0.3969654860159799, "learning_rate": 2.5695455374939147e-06, "loss": 0.4939, "step": 1405 }, { "epoch": 0.6797196035774716, "grad_norm": 0.4115250925249713, "learning_rate": 2.5625627338014004e-06, "loss": 0.5242, "step": 1406 }, { "epoch": 0.6802030456852792, "grad_norm": 0.4253454941567133, "learning_rate": 2.5555861603949832e-06, "loss": 0.513, "step": 1407 }, { "epoch": 0.6806864877930868, "grad_norm": 0.4448844424181978, "learning_rate": 2.548615835107352e-06, "loss": 0.5047, "step": 1408 }, { "epoch": 0.6811699299008944, "grad_norm": 0.41222858577096244, "learning_rate": 2.5416517757552157e-06, "loss": 0.5286, "step": 1409 }, { "epoch": 0.6816533720087019, "grad_norm": 0.42285086542458045, "learning_rate": 2.534694000139273e-06, "loss": 0.5169, "step": 1410 }, { "epoch": 0.6821368141165095, "grad_norm": 0.4122433378845125, "learning_rate": 2.5277425260441616e-06, "loss": 0.515, "step": 1411 }, { "epoch": 0.6826202562243171, "grad_norm": 0.4362061175188878, "learning_rate": 2.520797371238406e-06, "loss": 0.5225, "step": 1412 }, { "epoch": 0.6831036983321247, "grad_norm": 0.4411789430289944, "learning_rate": 2.513858553474382e-06, "loss": 0.5191, "step": 1413 }, { "epoch": 0.6835871404399323, "grad_norm": 0.4415744443134195, "learning_rate": 2.506926090488269e-06, "loss": 0.5306, "step": 1414 }, { "epoch": 0.6840705825477399, "grad_norm": 0.4477316137829116, "learning_rate": 2.5000000000000015e-06, "loss": 0.5248, "step": 1415 }, { "epoch": 0.6845540246555475, "grad_norm": 0.41099572818531255, "learning_rate": 2.4930802997132213e-06, "loss": 0.5218, "step": 1416 }, { "epoch": 0.6850374667633551, "grad_norm": 0.4009913145578469, "learning_rate": 2.486167007315243e-06, "loss": 0.5189, "step": 1417 }, { "epoch": 0.6855209088711627, "grad_norm": 0.43880257019064667, "learning_rate": 2.479260140476999e-06, "loss": 0.5114, "step": 1418 }, { "epoch": 0.6860043509789703, "grad_norm": 0.4322007294880164, "learning_rate": 2.4723597168529984e-06, "loss": 0.5066, "step": 1419 }, { "epoch": 0.6864877930867779, "grad_norm": 0.4106120224272021, "learning_rate": 2.465465754081277e-06, "loss": 0.4888, "step": 1420 }, { "epoch": 0.6869712351945855, "grad_norm": 0.422067985874925, "learning_rate": 2.458578269783364e-06, "loss": 0.5155, "step": 1421 }, { "epoch": 0.6874546773023931, "grad_norm": 0.4136266956566046, "learning_rate": 2.4516972815642166e-06, "loss": 0.5143, "step": 1422 }, { "epoch": 0.6879381194102007, "grad_norm": 0.4335536983962682, "learning_rate": 2.444822807012204e-06, "loss": 0.5196, "step": 1423 }, { "epoch": 0.6884215615180083, "grad_norm": 0.42723749184962806, "learning_rate": 2.4379548636990343e-06, "loss": 0.5136, "step": 1424 }, { "epoch": 0.6889050036258159, "grad_norm": 0.4307011628135296, "learning_rate": 2.4310934691797207e-06, "loss": 0.5305, "step": 1425 }, { "epoch": 0.6893884457336233, "grad_norm": 0.45161428649005025, "learning_rate": 2.4242386409925435e-06, "loss": 0.5048, "step": 1426 }, { "epoch": 0.6898718878414309, "grad_norm": 0.4351186095813856, "learning_rate": 2.4173903966589957e-06, "loss": 0.5216, "step": 1427 }, { "epoch": 0.6903553299492385, "grad_norm": 0.4128958039987362, "learning_rate": 2.410548753683743e-06, "loss": 0.5206, "step": 1428 }, { "epoch": 0.6908387720570461, "grad_norm": 0.4185374425485222, "learning_rate": 2.4037137295545737e-06, "loss": 0.5205, "step": 1429 }, { "epoch": 0.6913222141648537, "grad_norm": 0.4256083734187945, "learning_rate": 2.396885341742361e-06, "loss": 0.4804, "step": 1430 }, { "epoch": 0.6918056562726613, "grad_norm": 0.411514639053229, "learning_rate": 2.390063607701016e-06, "loss": 0.5194, "step": 1431 }, { "epoch": 0.6922890983804689, "grad_norm": 0.43241514860902464, "learning_rate": 2.3832485448674407e-06, "loss": 0.53, "step": 1432 }, { "epoch": 0.6927725404882765, "grad_norm": 0.4291596725507727, "learning_rate": 2.3764401706614832e-06, "loss": 0.5144, "step": 1433 }, { "epoch": 0.6932559825960841, "grad_norm": 0.42041788788695633, "learning_rate": 2.369638502485897e-06, "loss": 0.5148, "step": 1434 }, { "epoch": 0.6937394247038917, "grad_norm": 0.4482987713314786, "learning_rate": 2.3628435577262947e-06, "loss": 0.5191, "step": 1435 }, { "epoch": 0.6942228668116993, "grad_norm": 0.42573448798758273, "learning_rate": 2.3560553537511043e-06, "loss": 0.5021, "step": 1436 }, { "epoch": 0.6947063089195069, "grad_norm": 0.41739963072931596, "learning_rate": 2.3492739079115214e-06, "loss": 0.5061, "step": 1437 }, { "epoch": 0.6951897510273145, "grad_norm": 0.4366261411331466, "learning_rate": 2.3424992375414655e-06, "loss": 0.5133, "step": 1438 }, { "epoch": 0.6956731931351221, "grad_norm": 0.42225675860612266, "learning_rate": 2.3357313599575422e-06, "loss": 0.5254, "step": 1439 }, { "epoch": 0.6961566352429297, "grad_norm": 0.4347650420428982, "learning_rate": 2.3289702924589914e-06, "loss": 0.5143, "step": 1440 }, { "epoch": 0.6966400773507373, "grad_norm": 0.4220266027824235, "learning_rate": 2.3222160523276486e-06, "loss": 0.5194, "step": 1441 }, { "epoch": 0.6971235194585449, "grad_norm": 0.400495176856287, "learning_rate": 2.3154686568278933e-06, "loss": 0.5315, "step": 1442 }, { "epoch": 0.6976069615663524, "grad_norm": 0.4149083634198192, "learning_rate": 2.3087281232066134e-06, "loss": 0.5109, "step": 1443 }, { "epoch": 0.69809040367416, "grad_norm": 0.43831779922906355, "learning_rate": 2.3019944686931554e-06, "loss": 0.5256, "step": 1444 }, { "epoch": 0.6985738457819676, "grad_norm": 0.4379300687242213, "learning_rate": 2.2952677104992855e-06, "loss": 0.5287, "step": 1445 }, { "epoch": 0.6990572878897752, "grad_norm": 0.43973213205463885, "learning_rate": 2.2885478658191364e-06, "loss": 0.5192, "step": 1446 }, { "epoch": 0.6995407299975828, "grad_norm": 0.42002084857343974, "learning_rate": 2.281834951829174e-06, "loss": 0.521, "step": 1447 }, { "epoch": 0.7000241721053904, "grad_norm": 0.38595076036167364, "learning_rate": 2.2751289856881487e-06, "loss": 0.4869, "step": 1448 }, { "epoch": 0.700507614213198, "grad_norm": 0.436647846778714, "learning_rate": 2.268429984537048e-06, "loss": 0.5216, "step": 1449 }, { "epoch": 0.7009910563210056, "grad_norm": 0.4140253730185284, "learning_rate": 2.2617379654990623e-06, "loss": 0.5165, "step": 1450 }, { "epoch": 0.7014744984288132, "grad_norm": 0.4644944125638521, "learning_rate": 2.255052945679525e-06, "loss": 0.5183, "step": 1451 }, { "epoch": 0.7019579405366208, "grad_norm": 0.41536119938345195, "learning_rate": 2.248374942165894e-06, "loss": 0.5231, "step": 1452 }, { "epoch": 0.7024413826444283, "grad_norm": 0.4012349549582878, "learning_rate": 2.241703972027679e-06, "loss": 0.5168, "step": 1453 }, { "epoch": 0.7029248247522359, "grad_norm": 0.4521292215779327, "learning_rate": 2.23504005231642e-06, "loss": 0.5158, "step": 1454 }, { "epoch": 0.7034082668600435, "grad_norm": 0.4172271643387044, "learning_rate": 2.2283832000656304e-06, "loss": 0.4941, "step": 1455 }, { "epoch": 0.7038917089678511, "grad_norm": 0.421958406666486, "learning_rate": 2.221733432290762e-06, "loss": 0.5209, "step": 1456 }, { "epoch": 0.7043751510756587, "grad_norm": 0.42224698163781604, "learning_rate": 2.2150907659891566e-06, "loss": 0.5173, "step": 1457 }, { "epoch": 0.7048585931834663, "grad_norm": 0.43523243642666853, "learning_rate": 2.2084552181400087e-06, "loss": 0.5186, "step": 1458 }, { "epoch": 0.7053420352912738, "grad_norm": 0.4437233504227722, "learning_rate": 2.201826805704308e-06, "loss": 0.5125, "step": 1459 }, { "epoch": 0.7058254773990814, "grad_norm": 0.42532048824174346, "learning_rate": 2.195205545624813e-06, "loss": 0.5243, "step": 1460 }, { "epoch": 0.706308919506889, "grad_norm": 0.4322950043512432, "learning_rate": 2.188591454826e-06, "loss": 0.5135, "step": 1461 }, { "epoch": 0.7067923616146966, "grad_norm": 0.4272575345234204, "learning_rate": 2.181984550214015e-06, "loss": 0.5116, "step": 1462 }, { "epoch": 0.7072758037225042, "grad_norm": 0.41921770884395154, "learning_rate": 2.175384848676639e-06, "loss": 0.5165, "step": 1463 }, { "epoch": 0.7077592458303118, "grad_norm": 0.43176187181049736, "learning_rate": 2.168792367083243e-06, "loss": 0.5138, "step": 1464 }, { "epoch": 0.7082426879381194, "grad_norm": 0.41695232513283254, "learning_rate": 2.162207122284742e-06, "loss": 0.5091, "step": 1465 }, { "epoch": 0.708726130045927, "grad_norm": 0.41339935320490057, "learning_rate": 2.155629131113549e-06, "loss": 0.5158, "step": 1466 }, { "epoch": 0.7092095721537346, "grad_norm": 0.40689486411834114, "learning_rate": 2.1490584103835433e-06, "loss": 0.4847, "step": 1467 }, { "epoch": 0.7096930142615422, "grad_norm": 0.417060588337446, "learning_rate": 2.142494976890011e-06, "loss": 0.5241, "step": 1468 }, { "epoch": 0.7101764563693498, "grad_norm": 0.4289677663647557, "learning_rate": 2.135938847409625e-06, "loss": 0.5206, "step": 1469 }, { "epoch": 0.7106598984771574, "grad_norm": 0.43410470718447147, "learning_rate": 2.1293900387003742e-06, "loss": 0.4931, "step": 1470 }, { "epoch": 0.711143340584965, "grad_norm": 0.42958196993128944, "learning_rate": 2.1228485675015455e-06, "loss": 0.5204, "step": 1471 }, { "epoch": 0.7116267826927726, "grad_norm": 0.4311771692424152, "learning_rate": 2.1163144505336634e-06, "loss": 0.5219, "step": 1472 }, { "epoch": 0.7121102248005802, "grad_norm": 0.4150104118521869, "learning_rate": 2.109787704498459e-06, "loss": 0.519, "step": 1473 }, { "epoch": 0.7125936669083878, "grad_norm": 0.43013467795196153, "learning_rate": 2.1032683460788223e-06, "loss": 0.4979, "step": 1474 }, { "epoch": 0.7130771090161954, "grad_norm": 0.4303795815833922, "learning_rate": 2.0967563919387563e-06, "loss": 0.5256, "step": 1475 }, { "epoch": 0.7135605511240029, "grad_norm": 0.4386538663824397, "learning_rate": 2.0902518587233418e-06, "loss": 0.5195, "step": 1476 }, { "epoch": 0.7140439932318104, "grad_norm": 0.41141211228553354, "learning_rate": 2.08375476305869e-06, "loss": 0.5238, "step": 1477 }, { "epoch": 0.714527435339618, "grad_norm": 0.3832973623968104, "learning_rate": 2.077265121551903e-06, "loss": 0.4914, "step": 1478 }, { "epoch": 0.7150108774474256, "grad_norm": 0.4396380345403612, "learning_rate": 2.0707829507910237e-06, "loss": 0.5224, "step": 1479 }, { "epoch": 0.7154943195552332, "grad_norm": 0.4084969868928133, "learning_rate": 2.0643082673450053e-06, "loss": 0.5214, "step": 1480 }, { "epoch": 0.7159777616630408, "grad_norm": 0.41940449704789057, "learning_rate": 2.05784108776366e-06, "loss": 0.5098, "step": 1481 }, { "epoch": 0.7164612037708484, "grad_norm": 0.4368606150106444, "learning_rate": 2.051381428577622e-06, "loss": 0.5213, "step": 1482 }, { "epoch": 0.716944645878656, "grad_norm": 0.4475169176125263, "learning_rate": 2.044929306298298e-06, "loss": 0.5169, "step": 1483 }, { "epoch": 0.7174280879864636, "grad_norm": 0.4192404761939798, "learning_rate": 2.0384847374178346e-06, "loss": 0.5214, "step": 1484 }, { "epoch": 0.7179115300942712, "grad_norm": 0.4000794067095613, "learning_rate": 2.0320477384090665e-06, "loss": 0.5002, "step": 1485 }, { "epoch": 0.7183949722020788, "grad_norm": 0.4083964682274076, "learning_rate": 2.0256183257254837e-06, "loss": 0.5057, "step": 1486 }, { "epoch": 0.7188784143098864, "grad_norm": 0.4286205023949667, "learning_rate": 2.0191965158011854e-06, "loss": 0.4815, "step": 1487 }, { "epoch": 0.719361856417694, "grad_norm": 0.40907099979637535, "learning_rate": 2.012782325050831e-06, "loss": 0.5283, "step": 1488 }, { "epoch": 0.7198452985255016, "grad_norm": 0.41946463733283473, "learning_rate": 2.006375769869611e-06, "loss": 0.522, "step": 1489 }, { "epoch": 0.7203287406333092, "grad_norm": 0.4222854300641897, "learning_rate": 1.9999768666331974e-06, "loss": 0.5132, "step": 1490 }, { "epoch": 0.7208121827411168, "grad_norm": 0.3830302288103666, "learning_rate": 1.9935856316977044e-06, "loss": 0.4938, "step": 1491 }, { "epoch": 0.7212956248489243, "grad_norm": 0.47757660690611003, "learning_rate": 1.987202081399639e-06, "loss": 0.5251, "step": 1492 }, { "epoch": 0.7217790669567319, "grad_norm": 0.3992903621119011, "learning_rate": 1.9808262320558724e-06, "loss": 0.506, "step": 1493 }, { "epoch": 0.7222625090645395, "grad_norm": 0.41142424465140587, "learning_rate": 1.9744580999635902e-06, "loss": 0.5143, "step": 1494 }, { "epoch": 0.7227459511723471, "grad_norm": 0.4124129943865437, "learning_rate": 1.968097701400252e-06, "loss": 0.5245, "step": 1495 }, { "epoch": 0.7232293932801547, "grad_norm": 0.4312737875038871, "learning_rate": 1.9617450526235464e-06, "loss": 0.5178, "step": 1496 }, { "epoch": 0.7237128353879623, "grad_norm": 0.43509903197162936, "learning_rate": 1.9554001698713572e-06, "loss": 0.5131, "step": 1497 }, { "epoch": 0.7241962774957699, "grad_norm": 0.4260008705271214, "learning_rate": 1.949063069361717e-06, "loss": 0.5136, "step": 1498 }, { "epoch": 0.7246797196035775, "grad_norm": 0.42356802738060345, "learning_rate": 1.9427337672927632e-06, "loss": 0.5146, "step": 1499 }, { "epoch": 0.7251631617113851, "grad_norm": 0.4027997963462275, "learning_rate": 1.936412279842705e-06, "loss": 0.4913, "step": 1500 }, { "epoch": 0.7256466038191927, "grad_norm": 0.4124397793510055, "learning_rate": 1.9300986231697705e-06, "loss": 0.5175, "step": 1501 }, { "epoch": 0.7261300459270003, "grad_norm": 0.4442811918906246, "learning_rate": 1.9237928134121757e-06, "loss": 0.516, "step": 1502 }, { "epoch": 0.7266134880348079, "grad_norm": 0.4393627100062481, "learning_rate": 1.9174948666880805e-06, "loss": 0.5155, "step": 1503 }, { "epoch": 0.7270969301426154, "grad_norm": 0.43133527501756386, "learning_rate": 1.9112047990955446e-06, "loss": 0.5136, "step": 1504 }, { "epoch": 0.727580372250423, "grad_norm": 0.45322135855021595, "learning_rate": 1.9049226267124844e-06, "loss": 0.5172, "step": 1505 }, { "epoch": 0.7280638143582306, "grad_norm": 0.41078461158260915, "learning_rate": 1.8986483655966408e-06, "loss": 0.5179, "step": 1506 }, { "epoch": 0.7285472564660382, "grad_norm": 0.4178604053793329, "learning_rate": 1.8923820317855307e-06, "loss": 0.5076, "step": 1507 }, { "epoch": 0.7290306985738458, "grad_norm": 0.42623268157040256, "learning_rate": 1.8861236412964106e-06, "loss": 0.5172, "step": 1508 }, { "epoch": 0.7295141406816533, "grad_norm": 0.42835046843347674, "learning_rate": 1.879873210126229e-06, "loss": 0.5259, "step": 1509 }, { "epoch": 0.7299975827894609, "grad_norm": 0.4196504177616674, "learning_rate": 1.873630754251588e-06, "loss": 0.5177, "step": 1510 }, { "epoch": 0.7304810248972685, "grad_norm": 0.4079644120305993, "learning_rate": 1.8673962896287152e-06, "loss": 0.5201, "step": 1511 }, { "epoch": 0.7309644670050761, "grad_norm": 0.43892341061011425, "learning_rate": 1.8611698321933991e-06, "loss": 0.5186, "step": 1512 }, { "epoch": 0.7314479091128837, "grad_norm": 0.42683430911112086, "learning_rate": 1.8549513978609707e-06, "loss": 0.5111, "step": 1513 }, { "epoch": 0.7319313512206913, "grad_norm": 0.41062878136002484, "learning_rate": 1.8487410025262436e-06, "loss": 0.5103, "step": 1514 }, { "epoch": 0.7324147933284989, "grad_norm": 0.4256013874707191, "learning_rate": 1.8425386620634961e-06, "loss": 0.5167, "step": 1515 }, { "epoch": 0.7328982354363065, "grad_norm": 0.4388797350675763, "learning_rate": 1.8363443923264046e-06, "loss": 0.5125, "step": 1516 }, { "epoch": 0.7333816775441141, "grad_norm": 0.4394233254146738, "learning_rate": 1.8301582091480264e-06, "loss": 0.5217, "step": 1517 }, { "epoch": 0.7338651196519217, "grad_norm": 0.41564422037394944, "learning_rate": 1.8239801283407393e-06, "loss": 0.5164, "step": 1518 }, { "epoch": 0.7343485617597293, "grad_norm": 0.4173422643681329, "learning_rate": 1.8178101656962188e-06, "loss": 0.5205, "step": 1519 }, { "epoch": 0.7348320038675369, "grad_norm": 0.39698118648442665, "learning_rate": 1.8116483369853853e-06, "loss": 0.4835, "step": 1520 }, { "epoch": 0.7353154459753445, "grad_norm": 0.42300362992419904, "learning_rate": 1.8054946579583732e-06, "loss": 0.5143, "step": 1521 }, { "epoch": 0.7357988880831521, "grad_norm": 0.42464469919772974, "learning_rate": 1.7993491443444771e-06, "loss": 0.5129, "step": 1522 }, { "epoch": 0.7362823301909597, "grad_norm": 0.4501988280108448, "learning_rate": 1.7932118118521274e-06, "loss": 0.5131, "step": 1523 }, { "epoch": 0.7367657722987673, "grad_norm": 0.41493548901611477, "learning_rate": 1.787082676168842e-06, "loss": 0.5268, "step": 1524 }, { "epoch": 0.7372492144065748, "grad_norm": 0.4436917707906808, "learning_rate": 1.7809617529611828e-06, "loss": 0.5126, "step": 1525 }, { "epoch": 0.7377326565143824, "grad_norm": 0.39767655781448813, "learning_rate": 1.7748490578747257e-06, "loss": 0.4945, "step": 1526 }, { "epoch": 0.73821609862219, "grad_norm": 0.4281607415979641, "learning_rate": 1.7687446065340074e-06, "loss": 0.5189, "step": 1527 }, { "epoch": 0.7386995407299976, "grad_norm": 0.4123906023331037, "learning_rate": 1.7626484145425038e-06, "loss": 0.5117, "step": 1528 }, { "epoch": 0.7391829828378051, "grad_norm": 0.39861909677156787, "learning_rate": 1.7565604974825678e-06, "loss": 0.4917, "step": 1529 }, { "epoch": 0.7396664249456127, "grad_norm": 0.4164290248459804, "learning_rate": 1.7504808709154104e-06, "loss": 0.5187, "step": 1530 }, { "epoch": 0.7401498670534203, "grad_norm": 0.4149617264710624, "learning_rate": 1.744409550381041e-06, "loss": 0.529, "step": 1531 }, { "epoch": 0.7406333091612279, "grad_norm": 0.402995768205116, "learning_rate": 1.7383465513982517e-06, "loss": 0.4906, "step": 1532 }, { "epoch": 0.7411167512690355, "grad_norm": 0.4357911248878148, "learning_rate": 1.7322918894645525e-06, "loss": 0.5209, "step": 1533 }, { "epoch": 0.7416001933768431, "grad_norm": 0.4310636351470309, "learning_rate": 1.7262455800561456e-06, "loss": 0.529, "step": 1534 }, { "epoch": 0.7420836354846507, "grad_norm": 0.40110062198063573, "learning_rate": 1.7202076386278876e-06, "loss": 0.5218, "step": 1535 }, { "epoch": 0.7425670775924583, "grad_norm": 0.4044655145984996, "learning_rate": 1.7141780806132429e-06, "loss": 0.5038, "step": 1536 }, { "epoch": 0.7430505197002659, "grad_norm": 0.4169687562172726, "learning_rate": 1.70815692142425e-06, "loss": 0.5094, "step": 1537 }, { "epoch": 0.7435339618080735, "grad_norm": 0.3892005945860465, "learning_rate": 1.702144176451473e-06, "loss": 0.4909, "step": 1538 }, { "epoch": 0.7440174039158811, "grad_norm": 0.4059894671987348, "learning_rate": 1.696139861063974e-06, "loss": 0.5231, "step": 1539 }, { "epoch": 0.7445008460236887, "grad_norm": 0.4235285224343199, "learning_rate": 1.690143990609268e-06, "loss": 0.5116, "step": 1540 }, { "epoch": 0.7449842881314963, "grad_norm": 0.4066059462995061, "learning_rate": 1.6841565804132843e-06, "loss": 0.5159, "step": 1541 }, { "epoch": 0.7454677302393038, "grad_norm": 0.41374792014057904, "learning_rate": 1.6781776457803227e-06, "loss": 0.5146, "step": 1542 }, { "epoch": 0.7459511723471114, "grad_norm": 0.41330516594974576, "learning_rate": 1.6722072019930242e-06, "loss": 0.4841, "step": 1543 }, { "epoch": 0.746434614454919, "grad_norm": 0.4342078760633199, "learning_rate": 1.6662452643123234e-06, "loss": 0.5181, "step": 1544 }, { "epoch": 0.7469180565627266, "grad_norm": 0.4366803318877013, "learning_rate": 1.660291847977415e-06, "loss": 0.5056, "step": 1545 }, { "epoch": 0.7474014986705342, "grad_norm": 0.4107968782550443, "learning_rate": 1.6543469682057105e-06, "loss": 0.5102, "step": 1546 }, { "epoch": 0.7478849407783418, "grad_norm": 0.43703346533243426, "learning_rate": 1.6484106401927991e-06, "loss": 0.517, "step": 1547 }, { "epoch": 0.7483683828861494, "grad_norm": 0.4185149815126949, "learning_rate": 1.6424828791124159e-06, "loss": 0.5162, "step": 1548 }, { "epoch": 0.748851824993957, "grad_norm": 0.3941815905233016, "learning_rate": 1.6365637001163958e-06, "loss": 0.4694, "step": 1549 }, { "epoch": 0.7493352671017646, "grad_norm": 0.4069386532862478, "learning_rate": 1.6306531183346387e-06, "loss": 0.5172, "step": 1550 }, { "epoch": 0.7498187092095722, "grad_norm": 0.44449597102378385, "learning_rate": 1.624751148875065e-06, "loss": 0.5227, "step": 1551 }, { "epoch": 0.7503021513173798, "grad_norm": 0.4200070436877298, "learning_rate": 1.6188578068235855e-06, "loss": 0.5227, "step": 1552 }, { "epoch": 0.7507855934251874, "grad_norm": 0.4134676341568954, "learning_rate": 1.6129731072440586e-06, "loss": 0.5197, "step": 1553 }, { "epoch": 0.751269035532995, "grad_norm": 0.4342416540931307, "learning_rate": 1.6070970651782514e-06, "loss": 0.5234, "step": 1554 }, { "epoch": 0.7517524776408026, "grad_norm": 0.4621699665968105, "learning_rate": 1.6012296956457972e-06, "loss": 0.5224, "step": 1555 }, { "epoch": 0.7522359197486101, "grad_norm": 0.39794619123328484, "learning_rate": 1.5953710136441685e-06, "loss": 0.5222, "step": 1556 }, { "epoch": 0.7527193618564177, "grad_norm": 0.39795969856270086, "learning_rate": 1.5895210341486279e-06, "loss": 0.4697, "step": 1557 }, { "epoch": 0.7532028039642252, "grad_norm": 0.4348573897259895, "learning_rate": 1.583679772112196e-06, "loss": 0.5256, "step": 1558 }, { "epoch": 0.7536862460720328, "grad_norm": 0.4108494121358044, "learning_rate": 1.5778472424656083e-06, "loss": 0.5185, "step": 1559 }, { "epoch": 0.7541696881798404, "grad_norm": 0.41224584403564757, "learning_rate": 1.5720234601172767e-06, "loss": 0.5203, "step": 1560 }, { "epoch": 0.754653130287648, "grad_norm": 0.4348874788487397, "learning_rate": 1.566208439953265e-06, "loss": 0.5189, "step": 1561 }, { "epoch": 0.7551365723954556, "grad_norm": 0.42842919833727694, "learning_rate": 1.5604021968372286e-06, "loss": 0.5111, "step": 1562 }, { "epoch": 0.7556200145032632, "grad_norm": 0.43772492324957596, "learning_rate": 1.5546047456103964e-06, "loss": 0.5147, "step": 1563 }, { "epoch": 0.7561034566110708, "grad_norm": 0.41431446343362865, "learning_rate": 1.548816101091517e-06, "loss": 0.5149, "step": 1564 }, { "epoch": 0.7565868987188784, "grad_norm": 0.40777837421338714, "learning_rate": 1.5430362780768343e-06, "loss": 0.5117, "step": 1565 }, { "epoch": 0.757070340826686, "grad_norm": 0.4454487846070906, "learning_rate": 1.537265291340042e-06, "loss": 0.5074, "step": 1566 }, { "epoch": 0.7575537829344936, "grad_norm": 0.46396843002779686, "learning_rate": 1.531503155632249e-06, "loss": 0.5223, "step": 1567 }, { "epoch": 0.7580372250423012, "grad_norm": 0.41741600165011983, "learning_rate": 1.5257498856819353e-06, "loss": 0.5158, "step": 1568 }, { "epoch": 0.7585206671501088, "grad_norm": 0.4059061868499258, "learning_rate": 1.5200054961949233e-06, "loss": 0.5049, "step": 1569 }, { "epoch": 0.7590041092579164, "grad_norm": 0.41330390270516437, "learning_rate": 1.5142700018543382e-06, "loss": 0.5305, "step": 1570 }, { "epoch": 0.759487551365724, "grad_norm": 0.43099056056318497, "learning_rate": 1.508543417320562e-06, "loss": 0.5212, "step": 1571 }, { "epoch": 0.7599709934735316, "grad_norm": 0.39882553101049034, "learning_rate": 1.5028257572312105e-06, "loss": 0.4883, "step": 1572 }, { "epoch": 0.7604544355813392, "grad_norm": 0.4581685557000849, "learning_rate": 1.4971170362010774e-06, "loss": 0.5225, "step": 1573 }, { "epoch": 0.7609378776891468, "grad_norm": 0.4428964310587446, "learning_rate": 1.4914172688221213e-06, "loss": 0.5195, "step": 1574 }, { "epoch": 0.7614213197969543, "grad_norm": 0.4170791170307987, "learning_rate": 1.485726469663401e-06, "loss": 0.5294, "step": 1575 }, { "epoch": 0.7619047619047619, "grad_norm": 0.4212168944035229, "learning_rate": 1.4800446532710627e-06, "loss": 0.5143, "step": 1576 }, { "epoch": 0.7623882040125695, "grad_norm": 0.4317778496296824, "learning_rate": 1.4743718341682806e-06, "loss": 0.5242, "step": 1577 }, { "epoch": 0.7628716461203771, "grad_norm": 0.3887549768642727, "learning_rate": 1.468708026855245e-06, "loss": 0.4927, "step": 1578 }, { "epoch": 0.7633550882281847, "grad_norm": 0.41991973562573803, "learning_rate": 1.463053245809099e-06, "loss": 0.5248, "step": 1579 }, { "epoch": 0.7638385303359922, "grad_norm": 0.41267795471721197, "learning_rate": 1.457407505483921e-06, "loss": 0.5187, "step": 1580 }, { "epoch": 0.7643219724437998, "grad_norm": 0.44716407911896383, "learning_rate": 1.4517708203106763e-06, "loss": 0.523, "step": 1581 }, { "epoch": 0.7648054145516074, "grad_norm": 0.4254440302923612, "learning_rate": 1.446143204697187e-06, "loss": 0.5233, "step": 1582 }, { "epoch": 0.765288856659415, "grad_norm": 0.39996785018921494, "learning_rate": 1.4405246730280946e-06, "loss": 0.5172, "step": 1583 }, { "epoch": 0.7657722987672226, "grad_norm": 0.443369622770567, "learning_rate": 1.4349152396648153e-06, "loss": 0.5183, "step": 1584 }, { "epoch": 0.7662557408750302, "grad_norm": 0.40505843584897416, "learning_rate": 1.4293149189455146e-06, "loss": 0.5161, "step": 1585 }, { "epoch": 0.7667391829828378, "grad_norm": 0.4077704595280849, "learning_rate": 1.4237237251850634e-06, "loss": 0.5107, "step": 1586 }, { "epoch": 0.7672226250906454, "grad_norm": 0.40791039312028615, "learning_rate": 1.4181416726750052e-06, "loss": 0.5146, "step": 1587 }, { "epoch": 0.767706067198453, "grad_norm": 0.41705043398231784, "learning_rate": 1.4125687756835132e-06, "loss": 0.4812, "step": 1588 }, { "epoch": 0.7681895093062606, "grad_norm": 0.4235182346193989, "learning_rate": 1.4070050484553644e-06, "loss": 0.5129, "step": 1589 }, { "epoch": 0.7686729514140682, "grad_norm": 0.414137655909364, "learning_rate": 1.4014505052118893e-06, "loss": 0.5236, "step": 1590 }, { "epoch": 0.7691563935218757, "grad_norm": 0.43611300077847176, "learning_rate": 1.3959051601509537e-06, "loss": 0.5345, "step": 1591 }, { "epoch": 0.7696398356296833, "grad_norm": 0.410845648388898, "learning_rate": 1.3903690274469029e-06, "loss": 0.5115, "step": 1592 }, { "epoch": 0.7701232777374909, "grad_norm": 0.3961083948871449, "learning_rate": 1.3848421212505404e-06, "loss": 0.5168, "step": 1593 }, { "epoch": 0.7706067198452985, "grad_norm": 0.42179325369386034, "learning_rate": 1.37932445568908e-06, "loss": 0.5125, "step": 1594 }, { "epoch": 0.7710901619531061, "grad_norm": 0.4213217250215216, "learning_rate": 1.3738160448661253e-06, "loss": 0.5267, "step": 1595 }, { "epoch": 0.7715736040609137, "grad_norm": 0.4143253090473424, "learning_rate": 1.3683169028616155e-06, "loss": 0.5178, "step": 1596 }, { "epoch": 0.7720570461687213, "grad_norm": 0.4171850827541685, "learning_rate": 1.3628270437317993e-06, "loss": 0.5211, "step": 1597 }, { "epoch": 0.7725404882765289, "grad_norm": 0.39565458081679644, "learning_rate": 1.3573464815092003e-06, "loss": 0.5055, "step": 1598 }, { "epoch": 0.7730239303843365, "grad_norm": 0.4271922188091497, "learning_rate": 1.3518752302025773e-06, "loss": 0.5279, "step": 1599 }, { "epoch": 0.7735073724921441, "grad_norm": 0.4151739224827406, "learning_rate": 1.3464133037968914e-06, "loss": 0.5239, "step": 1600 }, { "epoch": 0.7739908145999517, "grad_norm": 0.3960683162461613, "learning_rate": 1.3409607162532628e-06, "loss": 0.4987, "step": 1601 }, { "epoch": 0.7744742567077593, "grad_norm": 0.43044333694614223, "learning_rate": 1.3355174815089477e-06, "loss": 0.5273, "step": 1602 }, { "epoch": 0.7749576988155669, "grad_norm": 0.4121649380386113, "learning_rate": 1.3300836134772916e-06, "loss": 0.5162, "step": 1603 }, { "epoch": 0.7754411409233745, "grad_norm": 0.4005354058641754, "learning_rate": 1.3246591260477015e-06, "loss": 0.5167, "step": 1604 }, { "epoch": 0.7759245830311821, "grad_norm": 0.3951020817933521, "learning_rate": 1.3192440330856005e-06, "loss": 0.5251, "step": 1605 }, { "epoch": 0.7764080251389897, "grad_norm": 0.42611917105831465, "learning_rate": 1.3138383484324063e-06, "loss": 0.5252, "step": 1606 }, { "epoch": 0.7768914672467973, "grad_norm": 0.40098636118444037, "learning_rate": 1.308442085905482e-06, "loss": 0.5101, "step": 1607 }, { "epoch": 0.7773749093546047, "grad_norm": 0.4404415072756006, "learning_rate": 1.30305525929811e-06, "loss": 0.5224, "step": 1608 }, { "epoch": 0.7778583514624123, "grad_norm": 0.40390400609014704, "learning_rate": 1.297677882379455e-06, "loss": 0.5191, "step": 1609 }, { "epoch": 0.7783417935702199, "grad_norm": 0.43645719023114843, "learning_rate": 1.2923099688945234e-06, "loss": 0.5096, "step": 1610 }, { "epoch": 0.7788252356780275, "grad_norm": 0.401799031041578, "learning_rate": 1.2869515325641357e-06, "loss": 0.4812, "step": 1611 }, { "epoch": 0.7793086777858351, "grad_norm": 0.40544675897829047, "learning_rate": 1.281602587084887e-06, "loss": 0.5211, "step": 1612 }, { "epoch": 0.7797921198936427, "grad_norm": 0.4166351291750946, "learning_rate": 1.2762631461291148e-06, "loss": 0.5294, "step": 1613 }, { "epoch": 0.7802755620014503, "grad_norm": 0.4334981607396633, "learning_rate": 1.2709332233448573e-06, "loss": 0.5096, "step": 1614 }, { "epoch": 0.7807590041092579, "grad_norm": 0.437984950036233, "learning_rate": 1.2656128323558286e-06, "loss": 0.5135, "step": 1615 }, { "epoch": 0.7812424462170655, "grad_norm": 0.41467240914944964, "learning_rate": 1.2603019867613764e-06, "loss": 0.5162, "step": 1616 }, { "epoch": 0.7817258883248731, "grad_norm": 0.40797210573439474, "learning_rate": 1.2550007001364518e-06, "loss": 0.5064, "step": 1617 }, { "epoch": 0.7822093304326807, "grad_norm": 0.40625079236189654, "learning_rate": 1.2497089860315675e-06, "loss": 0.5057, "step": 1618 }, { "epoch": 0.7826927725404883, "grad_norm": 0.3973135238618207, "learning_rate": 1.244426857972773e-06, "loss": 0.5125, "step": 1619 }, { "epoch": 0.7831762146482959, "grad_norm": 0.41758654400468537, "learning_rate": 1.239154329461615e-06, "loss": 0.5146, "step": 1620 }, { "epoch": 0.7836596567561035, "grad_norm": 0.4546571879884002, "learning_rate": 1.233891413975098e-06, "loss": 0.5138, "step": 1621 }, { "epoch": 0.7841430988639111, "grad_norm": 0.4501304501527847, "learning_rate": 1.228638124965661e-06, "loss": 0.5111, "step": 1622 }, { "epoch": 0.7846265409717187, "grad_norm": 0.40173574952002505, "learning_rate": 1.223394475861131e-06, "loss": 0.5134, "step": 1623 }, { "epoch": 0.7851099830795262, "grad_norm": 0.4105768174048188, "learning_rate": 1.2181604800646996e-06, "loss": 0.5092, "step": 1624 }, { "epoch": 0.7855934251873338, "grad_norm": 0.39390517153871624, "learning_rate": 1.212936150954882e-06, "loss": 0.498, "step": 1625 }, { "epoch": 0.7860768672951414, "grad_norm": 0.41453725871465896, "learning_rate": 1.207721501885486e-06, "loss": 0.5063, "step": 1626 }, { "epoch": 0.786560309402949, "grad_norm": 0.44249465126635484, "learning_rate": 1.2025165461855714e-06, "loss": 0.5212, "step": 1627 }, { "epoch": 0.7870437515107566, "grad_norm": 0.4079816768267276, "learning_rate": 1.1973212971594262e-06, "loss": 0.5155, "step": 1628 }, { "epoch": 0.7875271936185642, "grad_norm": 0.4318458945961838, "learning_rate": 1.1921357680865258e-06, "loss": 0.5183, "step": 1629 }, { "epoch": 0.7880106357263718, "grad_norm": 0.40656198305401237, "learning_rate": 1.1869599722215013e-06, "loss": 0.4949, "step": 1630 }, { "epoch": 0.7884940778341794, "grad_norm": 0.4056814293942294, "learning_rate": 1.181793922794102e-06, "loss": 0.5206, "step": 1631 }, { "epoch": 0.788977519941987, "grad_norm": 0.42895763169120843, "learning_rate": 1.1766376330091684e-06, "loss": 0.503, "step": 1632 }, { "epoch": 0.7894609620497945, "grad_norm": 0.4165970675717556, "learning_rate": 1.1714911160465924e-06, "loss": 0.5255, "step": 1633 }, { "epoch": 0.7899444041576021, "grad_norm": 0.4123917311937627, "learning_rate": 1.1663543850612847e-06, "loss": 0.5169, "step": 1634 }, { "epoch": 0.7904278462654097, "grad_norm": 0.41612583641837364, "learning_rate": 1.1612274531831463e-06, "loss": 0.4938, "step": 1635 }, { "epoch": 0.7909112883732173, "grad_norm": 0.40728900719245686, "learning_rate": 1.1561103335170242e-06, "loss": 0.5222, "step": 1636 }, { "epoch": 0.7913947304810249, "grad_norm": 0.4348645075910405, "learning_rate": 1.1510030391426941e-06, "loss": 0.5192, "step": 1637 }, { "epoch": 0.7918781725888325, "grad_norm": 0.4086546804175218, "learning_rate": 1.1459055831148074e-06, "loss": 0.5232, "step": 1638 }, { "epoch": 0.7923616146966401, "grad_norm": 0.40880965205946446, "learning_rate": 1.140817978462876e-06, "loss": 0.5212, "step": 1639 }, { "epoch": 0.7928450568044476, "grad_norm": 0.3893016631161895, "learning_rate": 1.1357402381912224e-06, "loss": 0.4873, "step": 1640 }, { "epoch": 0.7933284989122552, "grad_norm": 0.4215992969510908, "learning_rate": 1.1306723752789672e-06, "loss": 0.5211, "step": 1641 }, { "epoch": 0.7938119410200628, "grad_norm": 0.420615559845491, "learning_rate": 1.1256144026799703e-06, "loss": 0.5179, "step": 1642 }, { "epoch": 0.7942953831278704, "grad_norm": 0.39236133338098145, "learning_rate": 1.1205663333228217e-06, "loss": 0.4911, "step": 1643 }, { "epoch": 0.794778825235678, "grad_norm": 0.4158254754636244, "learning_rate": 1.1155281801107897e-06, "loss": 0.5146, "step": 1644 }, { "epoch": 0.7952622673434856, "grad_norm": 0.4092049660763265, "learning_rate": 1.1104999559218022e-06, "loss": 0.5063, "step": 1645 }, { "epoch": 0.7957457094512932, "grad_norm": 0.43121118572534733, "learning_rate": 1.1054816736084057e-06, "loss": 0.5122, "step": 1646 }, { "epoch": 0.7962291515591008, "grad_norm": 0.40574034047521074, "learning_rate": 1.1004733459977325e-06, "loss": 0.5089, "step": 1647 }, { "epoch": 0.7967125936669084, "grad_norm": 0.4343773778355907, "learning_rate": 1.0954749858914727e-06, "loss": 0.5177, "step": 1648 }, { "epoch": 0.797196035774716, "grad_norm": 0.429877165339691, "learning_rate": 1.0904866060658376e-06, "loss": 0.5211, "step": 1649 }, { "epoch": 0.7976794778825236, "grad_norm": 0.4108995062804379, "learning_rate": 1.0855082192715294e-06, "loss": 0.5174, "step": 1650 }, { "epoch": 0.7981629199903312, "grad_norm": 0.4018616150052113, "learning_rate": 1.0805398382337035e-06, "loss": 0.5049, "step": 1651 }, { "epoch": 0.7986463620981388, "grad_norm": 0.4089174910335269, "learning_rate": 1.0755814756519445e-06, "loss": 0.5226, "step": 1652 }, { "epoch": 0.7991298042059464, "grad_norm": 0.3964537076582955, "learning_rate": 1.0706331442002226e-06, "loss": 0.5095, "step": 1653 }, { "epoch": 0.799613246313754, "grad_norm": 0.4267767025207229, "learning_rate": 1.0656948565268782e-06, "loss": 0.5168, "step": 1654 }, { "epoch": 0.8000966884215616, "grad_norm": 0.41363796984886936, "learning_rate": 1.0607666252545673e-06, "loss": 0.5128, "step": 1655 }, { "epoch": 0.8005801305293692, "grad_norm": 0.43264146945425214, "learning_rate": 1.0558484629802502e-06, "loss": 0.514, "step": 1656 }, { "epoch": 0.8010635726371766, "grad_norm": 0.42544390140386235, "learning_rate": 1.0509403822751425e-06, "loss": 0.512, "step": 1657 }, { "epoch": 0.8015470147449842, "grad_norm": 0.3932679351449648, "learning_rate": 1.0460423956846955e-06, "loss": 0.4941, "step": 1658 }, { "epoch": 0.8020304568527918, "grad_norm": 0.4096876585407803, "learning_rate": 1.041154515728559e-06, "loss": 0.5088, "step": 1659 }, { "epoch": 0.8025138989605994, "grad_norm": 0.4173497731763413, "learning_rate": 1.0362767549005454e-06, "loss": 0.5119, "step": 1660 }, { "epoch": 0.802997341068407, "grad_norm": 0.41012015779324845, "learning_rate": 1.0314091256686065e-06, "loss": 0.5212, "step": 1661 }, { "epoch": 0.8034807831762146, "grad_norm": 0.38447439239259856, "learning_rate": 1.0265516404747943e-06, "loss": 0.5052, "step": 1662 }, { "epoch": 0.8039642252840222, "grad_norm": 0.40948392634706504, "learning_rate": 1.0217043117352337e-06, "loss": 0.5109, "step": 1663 }, { "epoch": 0.8044476673918298, "grad_norm": 0.40148827230751766, "learning_rate": 1.0168671518400853e-06, "loss": 0.5118, "step": 1664 }, { "epoch": 0.8049311094996374, "grad_norm": 0.3939565441232479, "learning_rate": 1.0120401731535213e-06, "loss": 0.4879, "step": 1665 }, { "epoch": 0.805414551607445, "grad_norm": 0.4394864393242481, "learning_rate": 1.0072233880136872e-06, "loss": 0.5104, "step": 1666 }, { "epoch": 0.8058979937152526, "grad_norm": 0.4318770671908104, "learning_rate": 1.0024168087326764e-06, "loss": 0.5235, "step": 1667 }, { "epoch": 0.8063814358230602, "grad_norm": 0.4281259140520081, "learning_rate": 9.976204475964907e-07, "loss": 0.5149, "step": 1668 }, { "epoch": 0.8068648779308678, "grad_norm": 0.43979946361695016, "learning_rate": 9.92834316865015e-07, "loss": 0.5191, "step": 1669 }, { "epoch": 0.8073483200386754, "grad_norm": 0.4312412015437643, "learning_rate": 9.88058428771987e-07, "loss": 0.5188, "step": 1670 }, { "epoch": 0.807831762146483, "grad_norm": 0.4461824252192259, "learning_rate": 9.832927955249605e-07, "loss": 0.518, "step": 1671 }, { "epoch": 0.8083152042542906, "grad_norm": 0.40455517199845253, "learning_rate": 9.785374293052802e-07, "loss": 0.5279, "step": 1672 }, { "epoch": 0.8087986463620981, "grad_norm": 0.40500700400967726, "learning_rate": 9.737923422680424e-07, "loss": 0.5267, "step": 1673 }, { "epoch": 0.8092820884699057, "grad_norm": 0.4053422468834684, "learning_rate": 9.690575465420733e-07, "loss": 0.5098, "step": 1674 }, { "epoch": 0.8097655305777133, "grad_norm": 0.41221923071964073, "learning_rate": 9.643330542298929e-07, "loss": 0.5171, "step": 1675 }, { "epoch": 0.8102489726855209, "grad_norm": 0.4289210188727792, "learning_rate": 9.596188774076849e-07, "loss": 0.5164, "step": 1676 }, { "epoch": 0.8107324147933285, "grad_norm": 0.4119920227929362, "learning_rate": 9.549150281252633e-07, "loss": 0.5167, "step": 1677 }, { "epoch": 0.8112158569011361, "grad_norm": 0.43146374267443927, "learning_rate": 9.50221518406047e-07, "loss": 0.5198, "step": 1678 }, { "epoch": 0.8116992990089437, "grad_norm": 0.3915995001014536, "learning_rate": 9.455383602470247e-07, "loss": 0.5194, "step": 1679 }, { "epoch": 0.8121827411167513, "grad_norm": 0.42092897815810126, "learning_rate": 9.408655656187282e-07, "loss": 0.5154, "step": 1680 }, { "epoch": 0.8126661832245589, "grad_norm": 0.43929014126287974, "learning_rate": 9.362031464651955e-07, "loss": 0.5111, "step": 1681 }, { "epoch": 0.8131496253323665, "grad_norm": 0.419403258433708, "learning_rate": 9.31551114703943e-07, "loss": 0.5175, "step": 1682 }, { "epoch": 0.813633067440174, "grad_norm": 0.4235039718034734, "learning_rate": 9.269094822259439e-07, "loss": 0.5219, "step": 1683 }, { "epoch": 0.8141165095479816, "grad_norm": 0.403949404981181, "learning_rate": 9.22278260895581e-07, "loss": 0.5257, "step": 1684 }, { "epoch": 0.8145999516557892, "grad_norm": 0.40201626032689436, "learning_rate": 9.176574625506324e-07, "loss": 0.5065, "step": 1685 }, { "epoch": 0.8150833937635968, "grad_norm": 0.42029809516611727, "learning_rate": 9.130470990022283e-07, "loss": 0.5198, "step": 1686 }, { "epoch": 0.8155668358714044, "grad_norm": 0.4443584968330059, "learning_rate": 9.084471820348306e-07, "loss": 0.5054, "step": 1687 }, { "epoch": 0.816050277979212, "grad_norm": 0.4011266291605723, "learning_rate": 9.038577234061979e-07, "loss": 0.481, "step": 1688 }, { "epoch": 0.8165337200870196, "grad_norm": 0.4116565403445696, "learning_rate": 8.992787348473575e-07, "loss": 0.512, "step": 1689 }, { "epoch": 0.8170171621948271, "grad_norm": 0.3855753519601646, "learning_rate": 8.947102280625708e-07, "loss": 0.4919, "step": 1690 }, { "epoch": 0.8175006043026347, "grad_norm": 0.3998193393341577, "learning_rate": 8.901522147293107e-07, "loss": 0.5063, "step": 1691 }, { "epoch": 0.8179840464104423, "grad_norm": 0.40465428030335077, "learning_rate": 8.856047064982276e-07, "loss": 0.4969, "step": 1692 }, { "epoch": 0.8184674885182499, "grad_norm": 0.3993077607842942, "learning_rate": 8.810677149931168e-07, "loss": 0.5123, "step": 1693 }, { "epoch": 0.8189509306260575, "grad_norm": 0.41845032917424874, "learning_rate": 8.765412518108957e-07, "loss": 0.5222, "step": 1694 }, { "epoch": 0.8194343727338651, "grad_norm": 0.4482989172909152, "learning_rate": 8.720253285215685e-07, "loss": 0.5245, "step": 1695 }, { "epoch": 0.8199178148416727, "grad_norm": 0.4096945568958353, "learning_rate": 8.675199566682002e-07, "loss": 0.4987, "step": 1696 }, { "epoch": 0.8204012569494803, "grad_norm": 0.42715377043083036, "learning_rate": 8.630251477668828e-07, "loss": 0.4956, "step": 1697 }, { "epoch": 0.8208846990572879, "grad_norm": 0.42586545844645524, "learning_rate": 8.585409133067119e-07, "loss": 0.5096, "step": 1698 }, { "epoch": 0.8213681411650955, "grad_norm": 0.43766586659276707, "learning_rate": 8.540672647497483e-07, "loss": 0.5136, "step": 1699 }, { "epoch": 0.8218515832729031, "grad_norm": 0.4371618341766256, "learning_rate": 8.49604213531004e-07, "loss": 0.5213, "step": 1700 }, { "epoch": 0.8223350253807107, "grad_norm": 0.4375571316772861, "learning_rate": 8.451517710583934e-07, "loss": 0.5051, "step": 1701 }, { "epoch": 0.8228184674885183, "grad_norm": 0.4132441919616583, "learning_rate": 8.407099487127207e-07, "loss": 0.5257, "step": 1702 }, { "epoch": 0.8233019095963259, "grad_norm": 0.42607745465695845, "learning_rate": 8.362787578476395e-07, "loss": 0.5249, "step": 1703 }, { "epoch": 0.8237853517041335, "grad_norm": 0.4075673839523143, "learning_rate": 8.318582097896316e-07, "loss": 0.5058, "step": 1704 }, { "epoch": 0.8242687938119411, "grad_norm": 0.42693741052199397, "learning_rate": 8.274483158379759e-07, "loss": 0.5111, "step": 1705 }, { "epoch": 0.8247522359197486, "grad_norm": 0.39832416179935565, "learning_rate": 8.230490872647146e-07, "loss": 0.4938, "step": 1706 }, { "epoch": 0.8252356780275562, "grad_norm": 0.422151557962671, "learning_rate": 8.18660535314631e-07, "loss": 0.5183, "step": 1707 }, { "epoch": 0.8257191201353637, "grad_norm": 0.4003210551929738, "learning_rate": 8.142826712052177e-07, "loss": 0.5131, "step": 1708 }, { "epoch": 0.8262025622431713, "grad_norm": 0.41552515229148246, "learning_rate": 8.099155061266495e-07, "loss": 0.5104, "step": 1709 }, { "epoch": 0.8266860043509789, "grad_norm": 0.4199192751255081, "learning_rate": 8.055590512417499e-07, "loss": 0.504, "step": 1710 }, { "epoch": 0.8271694464587865, "grad_norm": 0.4183052253157522, "learning_rate": 8.012133176859705e-07, "loss": 0.5183, "step": 1711 }, { "epoch": 0.8276528885665941, "grad_norm": 0.40771463289221466, "learning_rate": 7.968783165673554e-07, "loss": 0.5134, "step": 1712 }, { "epoch": 0.8281363306744017, "grad_norm": 0.4201027836512912, "learning_rate": 7.925540589665187e-07, "loss": 0.5074, "step": 1713 }, { "epoch": 0.8286197727822093, "grad_norm": 0.395143526726159, "learning_rate": 7.882405559366091e-07, "loss": 0.4907, "step": 1714 }, { "epoch": 0.8291032148900169, "grad_norm": 0.39924930985003787, "learning_rate": 7.839378185032897e-07, "loss": 0.5107, "step": 1715 }, { "epoch": 0.8295866569978245, "grad_norm": 0.4132095601626946, "learning_rate": 7.796458576647015e-07, "loss": 0.5185, "step": 1716 }, { "epoch": 0.8300700991056321, "grad_norm": 0.40587201306044, "learning_rate": 7.753646843914465e-07, "loss": 0.5182, "step": 1717 }, { "epoch": 0.8305535412134397, "grad_norm": 0.4094094956774689, "learning_rate": 7.710943096265461e-07, "loss": 0.5029, "step": 1718 }, { "epoch": 0.8310369833212473, "grad_norm": 0.41067812349491495, "learning_rate": 7.668347442854218e-07, "loss": 0.5021, "step": 1719 }, { "epoch": 0.8315204254290549, "grad_norm": 0.39956787890532264, "learning_rate": 7.625859992558665e-07, "loss": 0.5206, "step": 1720 }, { "epoch": 0.8320038675368625, "grad_norm": 0.43928086956712875, "learning_rate": 7.583480853980158e-07, "loss": 0.5134, "step": 1721 }, { "epoch": 0.8324873096446701, "grad_norm": 0.4100632271699525, "learning_rate": 7.541210135443188e-07, "loss": 0.5184, "step": 1722 }, { "epoch": 0.8329707517524776, "grad_norm": 0.3961555211112688, "learning_rate": 7.499047944995108e-07, "loss": 0.5222, "step": 1723 }, { "epoch": 0.8334541938602852, "grad_norm": 0.41401758140390904, "learning_rate": 7.45699439040588e-07, "loss": 0.5149, "step": 1724 }, { "epoch": 0.8339376359680928, "grad_norm": 0.41725576477900833, "learning_rate": 7.415049579167783e-07, "loss": 0.5086, "step": 1725 }, { "epoch": 0.8344210780759004, "grad_norm": 0.40808361223845036, "learning_rate": 7.37321361849514e-07, "loss": 0.5171, "step": 1726 }, { "epoch": 0.834904520183708, "grad_norm": 0.4044441513281848, "learning_rate": 7.331486615324024e-07, "loss": 0.4931, "step": 1727 }, { "epoch": 0.8353879622915156, "grad_norm": 0.39255016483428246, "learning_rate": 7.289868676312023e-07, "loss": 0.4895, "step": 1728 }, { "epoch": 0.8358714043993232, "grad_norm": 0.4273894357037594, "learning_rate": 7.248359907837959e-07, "loss": 0.5141, "step": 1729 }, { "epoch": 0.8363548465071308, "grad_norm": 0.41270523260835523, "learning_rate": 7.206960416001563e-07, "loss": 0.5053, "step": 1730 }, { "epoch": 0.8368382886149384, "grad_norm": 0.42210989792552517, "learning_rate": 7.165670306623296e-07, "loss": 0.515, "step": 1731 }, { "epoch": 0.837321730722746, "grad_norm": 0.4005116526979819, "learning_rate": 7.124489685243985e-07, "loss": 0.5084, "step": 1732 }, { "epoch": 0.8378051728305536, "grad_norm": 0.42730888005294004, "learning_rate": 7.08341865712463e-07, "loss": 0.5149, "step": 1733 }, { "epoch": 0.8382886149383612, "grad_norm": 0.3946117211995092, "learning_rate": 7.042457327246088e-07, "loss": 0.5272, "step": 1734 }, { "epoch": 0.8387720570461688, "grad_norm": 0.40058125990145727, "learning_rate": 7.001605800308825e-07, "loss": 0.5173, "step": 1735 }, { "epoch": 0.8392554991539763, "grad_norm": 0.39419621537510763, "learning_rate": 6.960864180732618e-07, "loss": 0.5182, "step": 1736 }, { "epoch": 0.8397389412617839, "grad_norm": 0.4302451888948554, "learning_rate": 6.920232572656349e-07, "loss": 0.5145, "step": 1737 }, { "epoch": 0.8402223833695915, "grad_norm": 0.39221396906385003, "learning_rate": 6.879711079937667e-07, "loss": 0.5079, "step": 1738 }, { "epoch": 0.840705825477399, "grad_norm": 0.4210023704512398, "learning_rate": 6.839299806152799e-07, "loss": 0.5061, "step": 1739 }, { "epoch": 0.8411892675852066, "grad_norm": 0.4031707044630559, "learning_rate": 6.79899885459619e-07, "loss": 0.5174, "step": 1740 }, { "epoch": 0.8416727096930142, "grad_norm": 0.40104705743190977, "learning_rate": 6.758808328280325e-07, "loss": 0.4981, "step": 1741 }, { "epoch": 0.8421561518008218, "grad_norm": 0.4158859718137932, "learning_rate": 6.718728329935448e-07, "loss": 0.5216, "step": 1742 }, { "epoch": 0.8426395939086294, "grad_norm": 0.4140963838597211, "learning_rate": 6.678758962009241e-07, "loss": 0.5154, "step": 1743 }, { "epoch": 0.843123036016437, "grad_norm": 0.41926365963573253, "learning_rate": 6.638900326666653e-07, "loss": 0.5181, "step": 1744 }, { "epoch": 0.8436064781242446, "grad_norm": 0.4007033614343704, "learning_rate": 6.599152525789531e-07, "loss": 0.4772, "step": 1745 }, { "epoch": 0.8440899202320522, "grad_norm": 0.4266694328755557, "learning_rate": 6.559515660976506e-07, "loss": 0.5153, "step": 1746 }, { "epoch": 0.8445733623398598, "grad_norm": 0.42158713984389296, "learning_rate": 6.519989833542567e-07, "loss": 0.5218, "step": 1747 }, { "epoch": 0.8450568044476674, "grad_norm": 0.4190422236566301, "learning_rate": 6.480575144518931e-07, "loss": 0.5267, "step": 1748 }, { "epoch": 0.845540246555475, "grad_norm": 0.42322451653416415, "learning_rate": 6.441271694652701e-07, "loss": 0.517, "step": 1749 }, { "epoch": 0.8460236886632826, "grad_norm": 0.4050974433698499, "learning_rate": 6.402079584406673e-07, "loss": 0.523, "step": 1750 }, { "epoch": 0.8465071307710902, "grad_norm": 0.39927068510798064, "learning_rate": 6.36299891395904e-07, "loss": 0.4943, "step": 1751 }, { "epoch": 0.8469905728788978, "grad_norm": 0.40520913199613756, "learning_rate": 6.32402978320315e-07, "loss": 0.519, "step": 1752 }, { "epoch": 0.8474740149867054, "grad_norm": 0.41524557234436116, "learning_rate": 6.285172291747232e-07, "loss": 0.5087, "step": 1753 }, { "epoch": 0.847957457094513, "grad_norm": 0.39348055940589066, "learning_rate": 6.246426538914174e-07, "loss": 0.5135, "step": 1754 }, { "epoch": 0.8484408992023206, "grad_norm": 0.40472211918575973, "learning_rate": 6.207792623741249e-07, "loss": 0.5181, "step": 1755 }, { "epoch": 0.8489243413101281, "grad_norm": 0.3797781522780497, "learning_rate": 6.169270644979836e-07, "loss": 0.4718, "step": 1756 }, { "epoch": 0.8494077834179357, "grad_norm": 0.4172564454240539, "learning_rate": 6.130860701095226e-07, "loss": 0.5093, "step": 1757 }, { "epoch": 0.8498912255257433, "grad_norm": 0.460481903524328, "learning_rate": 6.092562890266341e-07, "loss": 0.5245, "step": 1758 }, { "epoch": 0.8503746676335509, "grad_norm": 0.40544203017797725, "learning_rate": 6.054377310385479e-07, "loss": 0.5067, "step": 1759 }, { "epoch": 0.8508581097413584, "grad_norm": 0.4225253280006634, "learning_rate": 6.016304059058031e-07, "loss": 0.5169, "step": 1760 }, { "epoch": 0.851341551849166, "grad_norm": 0.40769266639259943, "learning_rate": 5.97834323360233e-07, "loss": 0.5243, "step": 1761 }, { "epoch": 0.8518249939569736, "grad_norm": 0.42284940262412657, "learning_rate": 5.940494931049262e-07, "loss": 0.5194, "step": 1762 }, { "epoch": 0.8523084360647812, "grad_norm": 0.3916025337851957, "learning_rate": 5.902759248142187e-07, "loss": 0.4975, "step": 1763 }, { "epoch": 0.8527918781725888, "grad_norm": 0.41326270414280697, "learning_rate": 5.86513628133652e-07, "loss": 0.5154, "step": 1764 }, { "epoch": 0.8532753202803964, "grad_norm": 0.40856588365868324, "learning_rate": 5.827626126799613e-07, "loss": 0.5154, "step": 1765 }, { "epoch": 0.853758762388204, "grad_norm": 0.42831173680710594, "learning_rate": 5.790228880410426e-07, "loss": 0.5163, "step": 1766 }, { "epoch": 0.8542422044960116, "grad_norm": 0.4218590594382107, "learning_rate": 5.75294463775935e-07, "loss": 0.517, "step": 1767 }, { "epoch": 0.8547256466038192, "grad_norm": 0.38253864809006055, "learning_rate": 5.715773494147919e-07, "loss": 0.4929, "step": 1768 }, { "epoch": 0.8552090887116268, "grad_norm": 0.40270548702028475, "learning_rate": 5.678715544588547e-07, "loss": 0.5088, "step": 1769 }, { "epoch": 0.8556925308194344, "grad_norm": 0.4229953125269584, "learning_rate": 5.641770883804365e-07, "loss": 0.5258, "step": 1770 }, { "epoch": 0.856175972927242, "grad_norm": 0.4037677845049078, "learning_rate": 5.604939606228887e-07, "loss": 0.5095, "step": 1771 }, { "epoch": 0.8566594150350495, "grad_norm": 0.39977977942883575, "learning_rate": 5.568221806005847e-07, "loss": 0.5128, "step": 1772 }, { "epoch": 0.8571428571428571, "grad_norm": 0.4175904938844971, "learning_rate": 5.531617576988879e-07, "loss": 0.5114, "step": 1773 }, { "epoch": 0.8576262992506647, "grad_norm": 0.41812393010867166, "learning_rate": 5.495127012741352e-07, "loss": 0.5188, "step": 1774 }, { "epoch": 0.8581097413584723, "grad_norm": 0.38871202154348194, "learning_rate": 5.45875020653609e-07, "loss": 0.4882, "step": 1775 }, { "epoch": 0.8585931834662799, "grad_norm": 0.40175664384357557, "learning_rate": 5.422487251355146e-07, "loss": 0.5088, "step": 1776 }, { "epoch": 0.8590766255740875, "grad_norm": 0.4258611448475652, "learning_rate": 5.386338239889549e-07, "loss": 0.5136, "step": 1777 }, { "epoch": 0.8595600676818951, "grad_norm": 0.38040545155326977, "learning_rate": 5.350303264539091e-07, "loss": 0.4692, "step": 1778 }, { "epoch": 0.8600435097897027, "grad_norm": 0.4185751036827134, "learning_rate": 5.314382417412062e-07, "loss": 0.516, "step": 1779 }, { "epoch": 0.8605269518975103, "grad_norm": 0.4237092619379993, "learning_rate": 5.278575790325052e-07, "loss": 0.5146, "step": 1780 }, { "epoch": 0.8610103940053179, "grad_norm": 0.4173802982789206, "learning_rate": 5.242883474802696e-07, "loss": 0.5125, "step": 1781 }, { "epoch": 0.8614938361131255, "grad_norm": 0.41838440801291993, "learning_rate": 5.207305562077403e-07, "loss": 0.5177, "step": 1782 }, { "epoch": 0.8619772782209331, "grad_norm": 0.4779855097218796, "learning_rate": 5.1718421430892e-07, "loss": 0.5304, "step": 1783 }, { "epoch": 0.8624607203287407, "grad_norm": 0.37738685143261025, "learning_rate": 5.136493308485446e-07, "loss": 0.486, "step": 1784 }, { "epoch": 0.8629441624365483, "grad_norm": 0.39963258309250466, "learning_rate": 5.101259148620618e-07, "loss": 0.4959, "step": 1785 }, { "epoch": 0.8634276045443559, "grad_norm": 0.39604391770722097, "learning_rate": 5.066139753556049e-07, "loss": 0.4993, "step": 1786 }, { "epoch": 0.8639110466521635, "grad_norm": 0.40732958269577874, "learning_rate": 5.031135213059756e-07, "loss": 0.5153, "step": 1787 }, { "epoch": 0.864394488759971, "grad_norm": 0.3897806967927546, "learning_rate": 4.99624561660616e-07, "loss": 0.4871, "step": 1788 }, { "epoch": 0.8648779308677785, "grad_norm": 0.4048723969181331, "learning_rate": 4.961471053375899e-07, "loss": 0.512, "step": 1789 }, { "epoch": 0.8653613729755861, "grad_norm": 0.4203351282800037, "learning_rate": 4.926811612255539e-07, "loss": 0.5121, "step": 1790 }, { "epoch": 0.8658448150833937, "grad_norm": 0.39858565202586066, "learning_rate": 4.892267381837396e-07, "loss": 0.5011, "step": 1791 }, { "epoch": 0.8663282571912013, "grad_norm": 0.4344627773200746, "learning_rate": 4.857838450419339e-07, "loss": 0.5103, "step": 1792 }, { "epoch": 0.8668116992990089, "grad_norm": 0.40293448022650774, "learning_rate": 4.823524906004468e-07, "loss": 0.5138, "step": 1793 }, { "epoch": 0.8672951414068165, "grad_norm": 0.41801337173969716, "learning_rate": 4.789326836300983e-07, "loss": 0.5151, "step": 1794 }, { "epoch": 0.8677785835146241, "grad_norm": 0.4058943681689954, "learning_rate": 4.7552443287218866e-07, "loss": 0.5098, "step": 1795 }, { "epoch": 0.8682620256224317, "grad_norm": 0.42652856984845416, "learning_rate": 4.7212774703848273e-07, "loss": 0.508, "step": 1796 }, { "epoch": 0.8687454677302393, "grad_norm": 0.4211824745719729, "learning_rate": 4.687426348111834e-07, "loss": 0.5122, "step": 1797 }, { "epoch": 0.8692289098380469, "grad_norm": 0.4022753726796167, "learning_rate": 4.65369104842911e-07, "loss": 0.52, "step": 1798 }, { "epoch": 0.8697123519458545, "grad_norm": 0.40283890754002527, "learning_rate": 4.620071657566777e-07, "loss": 0.5072, "step": 1799 }, { "epoch": 0.8701957940536621, "grad_norm": 0.4283203699114763, "learning_rate": 4.586568261458729e-07, "loss": 0.5096, "step": 1800 }, { "epoch": 0.8706792361614697, "grad_norm": 0.38328034805235095, "learning_rate": 4.553180945742336e-07, "loss": 0.4861, "step": 1801 }, { "epoch": 0.8711626782692773, "grad_norm": 0.4122501087059972, "learning_rate": 4.5199097957582816e-07, "loss": 0.5136, "step": 1802 }, { "epoch": 0.8716461203770849, "grad_norm": 0.41264272327652995, "learning_rate": 4.486754896550288e-07, "loss": 0.5012, "step": 1803 }, { "epoch": 0.8721295624848925, "grad_norm": 0.41725003600600513, "learning_rate": 4.45371633286496e-07, "loss": 0.5185, "step": 1804 }, { "epoch": 0.8726130045927, "grad_norm": 0.4078148663174146, "learning_rate": 4.4207941891515335e-07, "loss": 0.5135, "step": 1805 }, { "epoch": 0.8730964467005076, "grad_norm": 0.4209684818924423, "learning_rate": 4.3879885495616505e-07, "loss": 0.512, "step": 1806 }, { "epoch": 0.8735798888083152, "grad_norm": 0.3832255061477332, "learning_rate": 4.3552994979491836e-07, "loss": 0.5131, "step": 1807 }, { "epoch": 0.8740633309161228, "grad_norm": 0.415646535369065, "learning_rate": 4.322727117869951e-07, "loss": 0.5156, "step": 1808 }, { "epoch": 0.8745467730239304, "grad_norm": 0.403529021224522, "learning_rate": 4.290271492581627e-07, "loss": 0.5225, "step": 1809 }, { "epoch": 0.875030215131738, "grad_norm": 0.4248226663595473, "learning_rate": 4.257932705043372e-07, "loss": 0.5276, "step": 1810 }, { "epoch": 0.8755136572395456, "grad_norm": 0.42279657022545747, "learning_rate": 4.2257108379157586e-07, "loss": 0.5224, "step": 1811 }, { "epoch": 0.8759970993473531, "grad_norm": 0.4140176038814713, "learning_rate": 4.1936059735604497e-07, "loss": 0.5161, "step": 1812 }, { "epoch": 0.8764805414551607, "grad_norm": 0.39792458444383394, "learning_rate": 4.161618194040079e-07, "loss": 0.5277, "step": 1813 }, { "epoch": 0.8769639835629683, "grad_norm": 0.39549573015495143, "learning_rate": 4.129747581117993e-07, "loss": 0.5053, "step": 1814 }, { "epoch": 0.8774474256707759, "grad_norm": 0.4160802633412061, "learning_rate": 4.0979942162580387e-07, "loss": 0.516, "step": 1815 }, { "epoch": 0.8779308677785835, "grad_norm": 0.41239251805984983, "learning_rate": 4.06635818062438e-07, "loss": 0.5278, "step": 1816 }, { "epoch": 0.8784143098863911, "grad_norm": 0.40871873580107365, "learning_rate": 4.0348395550812713e-07, "loss": 0.5294, "step": 1817 }, { "epoch": 0.8788977519941987, "grad_norm": 0.40365670038657436, "learning_rate": 4.003438420192873e-07, "loss": 0.5158, "step": 1818 }, { "epoch": 0.8793811941020063, "grad_norm": 0.4255428234546921, "learning_rate": 3.9721548562229985e-07, "loss": 0.5114, "step": 1819 }, { "epoch": 0.8798646362098139, "grad_norm": 0.41203315649756733, "learning_rate": 3.9409889431349656e-07, "loss": 0.5116, "step": 1820 }, { "epoch": 0.8803480783176215, "grad_norm": 0.4149872650348109, "learning_rate": 3.9099407605913576e-07, "loss": 0.5099, "step": 1821 }, { "epoch": 0.880831520425429, "grad_norm": 0.4258100076362105, "learning_rate": 3.879010387953841e-07, "loss": 0.5175, "step": 1822 }, { "epoch": 0.8813149625332366, "grad_norm": 0.3902355927247227, "learning_rate": 3.84819790428293e-07, "loss": 0.498, "step": 1823 }, { "epoch": 0.8817984046410442, "grad_norm": 0.40842472365457144, "learning_rate": 3.8175033883378233e-07, "loss": 0.518, "step": 1824 }, { "epoch": 0.8822818467488518, "grad_norm": 0.4221970543634826, "learning_rate": 3.7869269185761613e-07, "loss": 0.5216, "step": 1825 }, { "epoch": 0.8827652888566594, "grad_norm": 0.40616883661281006, "learning_rate": 3.7564685731538985e-07, "loss": 0.5066, "step": 1826 }, { "epoch": 0.883248730964467, "grad_norm": 0.4061562407072031, "learning_rate": 3.7261284299249967e-07, "loss": 0.517, "step": 1827 }, { "epoch": 0.8837321730722746, "grad_norm": 0.4079225433423233, "learning_rate": 3.695906566441304e-07, "loss": 0.4959, "step": 1828 }, { "epoch": 0.8842156151800822, "grad_norm": 0.38197368709112006, "learning_rate": 3.665803059952344e-07, "loss": 0.4871, "step": 1829 }, { "epoch": 0.8846990572878898, "grad_norm": 0.411849076052872, "learning_rate": 3.63581798740511e-07, "loss": 0.5143, "step": 1830 }, { "epoch": 0.8851824993956974, "grad_norm": 0.393276210273132, "learning_rate": 3.605951425443871e-07, "loss": 0.4936, "step": 1831 }, { "epoch": 0.885665941503505, "grad_norm": 0.369604359657528, "learning_rate": 3.576203450409943e-07, "loss": 0.4684, "step": 1832 }, { "epoch": 0.8861493836113126, "grad_norm": 0.43326466002005165, "learning_rate": 3.5465741383415684e-07, "loss": 0.5104, "step": 1833 }, { "epoch": 0.8866328257191202, "grad_norm": 0.41527359664646213, "learning_rate": 3.5170635649736497e-07, "loss": 0.519, "step": 1834 }, { "epoch": 0.8871162678269278, "grad_norm": 0.41356740894281485, "learning_rate": 3.487671805737597e-07, "loss": 0.508, "step": 1835 }, { "epoch": 0.8875997099347354, "grad_norm": 0.4050751048123327, "learning_rate": 3.4583989357611037e-07, "loss": 0.5135, "step": 1836 }, { "epoch": 0.888083152042543, "grad_norm": 0.3923610722591795, "learning_rate": 3.4292450298679945e-07, "loss": 0.5075, "step": 1837 }, { "epoch": 0.8885665941503504, "grad_norm": 0.41919225013002887, "learning_rate": 3.400210162577999e-07, "loss": 0.5166, "step": 1838 }, { "epoch": 0.889050036258158, "grad_norm": 0.42118222715491443, "learning_rate": 3.371294408106585e-07, "loss": 0.523, "step": 1839 }, { "epoch": 0.8895334783659656, "grad_norm": 0.3952238335142466, "learning_rate": 3.3424978403647443e-07, "loss": 0.5138, "step": 1840 }, { "epoch": 0.8900169204737732, "grad_norm": 0.4163195177412695, "learning_rate": 3.313820532958817e-07, "loss": 0.5274, "step": 1841 }, { "epoch": 0.8905003625815808, "grad_norm": 0.3930314520659748, "learning_rate": 3.285262559190322e-07, "loss": 0.4991, "step": 1842 }, { "epoch": 0.8909838046893884, "grad_norm": 0.4336804309313973, "learning_rate": 3.256823992055741e-07, "loss": 0.5009, "step": 1843 }, { "epoch": 0.891467246797196, "grad_norm": 0.41714068524986875, "learning_rate": 3.228504904246349e-07, "loss": 0.5238, "step": 1844 }, { "epoch": 0.8919506889050036, "grad_norm": 0.41848606366751967, "learning_rate": 3.20030536814801e-07, "loss": 0.5202, "step": 1845 }, { "epoch": 0.8924341310128112, "grad_norm": 0.422964314144621, "learning_rate": 3.1722254558410047e-07, "loss": 0.5104, "step": 1846 }, { "epoch": 0.8929175731206188, "grad_norm": 0.41539348703446205, "learning_rate": 3.144265239099864e-07, "loss": 0.5152, "step": 1847 }, { "epoch": 0.8934010152284264, "grad_norm": 0.3936271006898258, "learning_rate": 3.1164247893931575e-07, "loss": 0.5071, "step": 1848 }, { "epoch": 0.893884457336234, "grad_norm": 0.4152031331913687, "learning_rate": 3.088704177883306e-07, "loss": 0.5181, "step": 1849 }, { "epoch": 0.8943678994440416, "grad_norm": 0.4176432021270733, "learning_rate": 3.06110347542643e-07, "loss": 0.5235, "step": 1850 }, { "epoch": 0.8948513415518492, "grad_norm": 0.3954219378639727, "learning_rate": 3.033622752572157e-07, "loss": 0.5019, "step": 1851 }, { "epoch": 0.8953347836596568, "grad_norm": 0.45830856560980365, "learning_rate": 3.0062620795634214e-07, "loss": 0.5263, "step": 1852 }, { "epoch": 0.8958182257674644, "grad_norm": 0.4009466020951186, "learning_rate": 2.9790215263363174e-07, "loss": 0.5222, "step": 1853 }, { "epoch": 0.896301667875272, "grad_norm": 0.3933495297633584, "learning_rate": 2.951901162519877e-07, "loss": 0.5233, "step": 1854 }, { "epoch": 0.8967851099830795, "grad_norm": 0.39895160904445, "learning_rate": 2.9249010574359636e-07, "loss": 0.5212, "step": 1855 }, { "epoch": 0.8972685520908871, "grad_norm": 0.42068899596041226, "learning_rate": 2.898021280098995e-07, "loss": 0.5168, "step": 1856 }, { "epoch": 0.8977519941986947, "grad_norm": 0.39971963228555085, "learning_rate": 2.8712618992158656e-07, "loss": 0.5084, "step": 1857 }, { "epoch": 0.8982354363065023, "grad_norm": 0.3999616227972635, "learning_rate": 2.8446229831856964e-07, "loss": 0.5088, "step": 1858 }, { "epoch": 0.8987188784143099, "grad_norm": 0.4001447692276326, "learning_rate": 2.8181046000997136e-07, "loss": 0.521, "step": 1859 }, { "epoch": 0.8992023205221175, "grad_norm": 0.41592034251039167, "learning_rate": 2.791706817741041e-07, "loss": 0.5072, "step": 1860 }, { "epoch": 0.8996857626299251, "grad_norm": 0.4445686187455443, "learning_rate": 2.765429703584538e-07, "loss": 0.5148, "step": 1861 }, { "epoch": 0.9001692047377327, "grad_norm": 0.40228802491920107, "learning_rate": 2.739273324796621e-07, "loss": 0.5262, "step": 1862 }, { "epoch": 0.9006526468455403, "grad_norm": 0.40404504261863744, "learning_rate": 2.7132377482351037e-07, "loss": 0.5147, "step": 1863 }, { "epoch": 0.9011360889533478, "grad_norm": 0.3986359660989621, "learning_rate": 2.687323040449025e-07, "loss": 0.5172, "step": 1864 }, { "epoch": 0.9016195310611554, "grad_norm": 0.42039178580411435, "learning_rate": 2.6615292676784533e-07, "loss": 0.5191, "step": 1865 }, { "epoch": 0.902102973168963, "grad_norm": 0.4168785648766661, "learning_rate": 2.635856495854372e-07, "loss": 0.5116, "step": 1866 }, { "epoch": 0.9025864152767706, "grad_norm": 0.4006359687639295, "learning_rate": 2.6103047905984224e-07, "loss": 0.5243, "step": 1867 }, { "epoch": 0.9030698573845782, "grad_norm": 0.4136741219117099, "learning_rate": 2.584874217222855e-07, "loss": 0.516, "step": 1868 }, { "epoch": 0.9035532994923858, "grad_norm": 0.41454758895188654, "learning_rate": 2.5595648407302496e-07, "loss": 0.5299, "step": 1869 }, { "epoch": 0.9040367416001934, "grad_norm": 0.43072596167116733, "learning_rate": 2.53437672581342e-07, "loss": 0.5192, "step": 1870 }, { "epoch": 0.9045201837080009, "grad_norm": 0.413346134850188, "learning_rate": 2.5093099368551974e-07, "loss": 0.5135, "step": 1871 }, { "epoch": 0.9050036258158085, "grad_norm": 0.44414111234791465, "learning_rate": 2.484364537928341e-07, "loss": 0.5248, "step": 1872 }, { "epoch": 0.9054870679236161, "grad_norm": 0.41031454686253116, "learning_rate": 2.45954059279529e-07, "loss": 0.5198, "step": 1873 }, { "epoch": 0.9059705100314237, "grad_norm": 0.3982976345229948, "learning_rate": 2.4348381649080486e-07, "loss": 0.5163, "step": 1874 }, { "epoch": 0.9064539521392313, "grad_norm": 0.4007617837820295, "learning_rate": 2.41025731740801e-07, "loss": 0.511, "step": 1875 }, { "epoch": 0.9069373942470389, "grad_norm": 0.40168617787804406, "learning_rate": 2.3857981131258037e-07, "loss": 0.5114, "step": 1876 }, { "epoch": 0.9074208363548465, "grad_norm": 0.38110421429609603, "learning_rate": 2.3614606145811347e-07, "loss": 0.4992, "step": 1877 }, { "epoch": 0.9079042784626541, "grad_norm": 0.3870732423514054, "learning_rate": 2.3372448839825978e-07, "loss": 0.4887, "step": 1878 }, { "epoch": 0.9083877205704617, "grad_norm": 0.39979584331802676, "learning_rate": 2.3131509832275633e-07, "loss": 0.5122, "step": 1879 }, { "epoch": 0.9088711626782693, "grad_norm": 0.3996732608438804, "learning_rate": 2.2891789739019733e-07, "loss": 0.5102, "step": 1880 }, { "epoch": 0.9093546047860769, "grad_norm": 0.40968516048558534, "learning_rate": 2.2653289172802295e-07, "loss": 0.5049, "step": 1881 }, { "epoch": 0.9098380468938845, "grad_norm": 0.4006751726323446, "learning_rate": 2.241600874324984e-07, "loss": 0.5144, "step": 1882 }, { "epoch": 0.9103214890016921, "grad_norm": 0.4066456668668, "learning_rate": 2.2179949056870432e-07, "loss": 0.5184, "step": 1883 }, { "epoch": 0.9108049311094997, "grad_norm": 0.4179374057794063, "learning_rate": 2.194511071705141e-07, "loss": 0.5131, "step": 1884 }, { "epoch": 0.9112883732173073, "grad_norm": 0.419480536858942, "learning_rate": 2.1711494324058724e-07, "loss": 0.5147, "step": 1885 }, { "epoch": 0.9117718153251149, "grad_norm": 0.40624640146953556, "learning_rate": 2.1479100475034598e-07, "loss": 0.5084, "step": 1886 }, { "epoch": 0.9122552574329225, "grad_norm": 0.40367583928635464, "learning_rate": 2.1247929763996534e-07, "loss": 0.4832, "step": 1887 }, { "epoch": 0.91273869954073, "grad_norm": 0.3989060344990105, "learning_rate": 2.101798278183542e-07, "loss": 0.5144, "step": 1888 }, { "epoch": 0.9132221416485375, "grad_norm": 0.3998308893808953, "learning_rate": 2.0789260116314215e-07, "loss": 0.5081, "step": 1889 }, { "epoch": 0.9137055837563451, "grad_norm": 0.4063990008087812, "learning_rate": 2.0561762352066638e-07, "loss": 0.5109, "step": 1890 }, { "epoch": 0.9141890258641527, "grad_norm": 0.4167108480628528, "learning_rate": 2.0335490070595208e-07, "loss": 0.5186, "step": 1891 }, { "epoch": 0.9146724679719603, "grad_norm": 0.39430080435851855, "learning_rate": 2.011044385027011e-07, "loss": 0.5101, "step": 1892 }, { "epoch": 0.9151559100797679, "grad_norm": 0.42096559238441866, "learning_rate": 1.988662426632765e-07, "loss": 0.5078, "step": 1893 }, { "epoch": 0.9156393521875755, "grad_norm": 0.39723951707790667, "learning_rate": 1.9664031890868795e-07, "loss": 0.5223, "step": 1894 }, { "epoch": 0.9161227942953831, "grad_norm": 0.3912147208179025, "learning_rate": 1.9442667292857432e-07, "loss": 0.509, "step": 1895 }, { "epoch": 0.9166062364031907, "grad_norm": 0.4054442997347736, "learning_rate": 1.922253103811944e-07, "loss": 0.4972, "step": 1896 }, { "epoch": 0.9170896785109983, "grad_norm": 0.4117401816100168, "learning_rate": 1.9003623689340777e-07, "loss": 0.5143, "step": 1897 }, { "epoch": 0.9175731206188059, "grad_norm": 0.40528953423093284, "learning_rate": 1.8785945806066297e-07, "loss": 0.5186, "step": 1898 }, { "epoch": 0.9180565627266135, "grad_norm": 0.4027696401480633, "learning_rate": 1.85694979446982e-07, "loss": 0.5167, "step": 1899 }, { "epoch": 0.9185400048344211, "grad_norm": 0.38938110778215645, "learning_rate": 1.835428065849465e-07, "loss": 0.5141, "step": 1900 }, { "epoch": 0.9190234469422287, "grad_norm": 0.3958049685314876, "learning_rate": 1.814029449756849e-07, "loss": 0.5231, "step": 1901 }, { "epoch": 0.9195068890500363, "grad_norm": 0.4039199277502588, "learning_rate": 1.7927540008885414e-07, "loss": 0.5088, "step": 1902 }, { "epoch": 0.9199903311578439, "grad_norm": 0.40426884197944674, "learning_rate": 1.7716017736263192e-07, "loss": 0.5129, "step": 1903 }, { "epoch": 0.9204737732656514, "grad_norm": 0.41358470698939953, "learning_rate": 1.7505728220369667e-07, "loss": 0.5203, "step": 1904 }, { "epoch": 0.920957215373459, "grad_norm": 0.4250820090378729, "learning_rate": 1.729667199872187e-07, "loss": 0.5223, "step": 1905 }, { "epoch": 0.9214406574812666, "grad_norm": 0.40899977989644076, "learning_rate": 1.70888496056843e-07, "loss": 0.5107, "step": 1906 }, { "epoch": 0.9219240995890742, "grad_norm": 0.4187760713922149, "learning_rate": 1.6882261572467862e-07, "loss": 0.5142, "step": 1907 }, { "epoch": 0.9224075416968818, "grad_norm": 0.39684261118945696, "learning_rate": 1.6676908427128103e-07, "loss": 0.4847, "step": 1908 }, { "epoch": 0.9228909838046894, "grad_norm": 0.4124141033869449, "learning_rate": 1.64727906945642e-07, "loss": 0.5063, "step": 1909 }, { "epoch": 0.923374425912497, "grad_norm": 0.4104731721152495, "learning_rate": 1.6269908896517638e-07, "loss": 0.5035, "step": 1910 }, { "epoch": 0.9238578680203046, "grad_norm": 0.38208183163995635, "learning_rate": 1.6068263551570596e-07, "loss": 0.4855, "step": 1911 }, { "epoch": 0.9243413101281122, "grad_norm": 0.37943822460943005, "learning_rate": 1.5867855175144885e-07, "loss": 0.4863, "step": 1912 }, { "epoch": 0.9248247522359198, "grad_norm": 0.4169103989292416, "learning_rate": 1.5668684279500245e-07, "loss": 0.5077, "step": 1913 }, { "epoch": 0.9253081943437274, "grad_norm": 0.41157707540822663, "learning_rate": 1.5470751373733773e-07, "loss": 0.5184, "step": 1914 }, { "epoch": 0.925791636451535, "grad_norm": 0.39771451862665147, "learning_rate": 1.5274056963777817e-07, "loss": 0.5094, "step": 1915 }, { "epoch": 0.9262750785593425, "grad_norm": 0.4092987974762817, "learning_rate": 1.507860155239921e-07, "loss": 0.5154, "step": 1916 }, { "epoch": 0.9267585206671501, "grad_norm": 0.3854503813446518, "learning_rate": 1.488438563919764e-07, "loss": 0.4938, "step": 1917 }, { "epoch": 0.9272419627749577, "grad_norm": 0.3900052964813903, "learning_rate": 1.4691409720604732e-07, "loss": 0.5077, "step": 1918 }, { "epoch": 0.9277254048827653, "grad_norm": 0.40750712678387396, "learning_rate": 1.449967428988247e-07, "loss": 0.5145, "step": 1919 }, { "epoch": 0.9282088469905729, "grad_norm": 0.4023813113333878, "learning_rate": 1.4309179837122045e-07, "loss": 0.5291, "step": 1920 }, { "epoch": 0.9286922890983804, "grad_norm": 0.38502235475455626, "learning_rate": 1.411992684924257e-07, "loss": 0.5119, "step": 1921 }, { "epoch": 0.929175731206188, "grad_norm": 0.40862887218787325, "learning_rate": 1.3931915809990039e-07, "loss": 0.5106, "step": 1922 }, { "epoch": 0.9296591733139956, "grad_norm": 0.4123756674563694, "learning_rate": 1.374514719993575e-07, "loss": 0.5126, "step": 1923 }, { "epoch": 0.9301426154218032, "grad_norm": 0.41456641529199556, "learning_rate": 1.3559621496475438e-07, "loss": 0.5145, "step": 1924 }, { "epoch": 0.9306260575296108, "grad_norm": 0.4049152537963314, "learning_rate": 1.3375339173827551e-07, "loss": 0.5261, "step": 1925 }, { "epoch": 0.9311094996374184, "grad_norm": 0.37450439680837744, "learning_rate": 1.3192300703032733e-07, "loss": 0.474, "step": 1926 }, { "epoch": 0.931592941745226, "grad_norm": 0.41100475742292075, "learning_rate": 1.3010506551952018e-07, "loss": 0.5134, "step": 1927 }, { "epoch": 0.9320763838530336, "grad_norm": 0.41369315234307685, "learning_rate": 1.2829957185265863e-07, "loss": 0.52, "step": 1928 }, { "epoch": 0.9325598259608412, "grad_norm": 0.3885589982730842, "learning_rate": 1.2650653064473106e-07, "loss": 0.5031, "step": 1929 }, { "epoch": 0.9330432680686488, "grad_norm": 0.3951920703691663, "learning_rate": 1.2472594647889357e-07, "loss": 0.5092, "step": 1930 }, { "epoch": 0.9335267101764564, "grad_norm": 0.40947647060207415, "learning_rate": 1.2295782390646494e-07, "loss": 0.5177, "step": 1931 }, { "epoch": 0.934010152284264, "grad_norm": 0.390574491653679, "learning_rate": 1.2120216744690716e-07, "loss": 0.5133, "step": 1932 }, { "epoch": 0.9344935943920716, "grad_norm": 0.4045498383765011, "learning_rate": 1.194589815878211e-07, "loss": 0.5163, "step": 1933 }, { "epoch": 0.9349770364998792, "grad_norm": 0.40440549648310886, "learning_rate": 1.177282707849281e-07, "loss": 0.5181, "step": 1934 }, { "epoch": 0.9354604786076868, "grad_norm": 0.4024689599876574, "learning_rate": 1.1601003946206723e-07, "loss": 0.5181, "step": 1935 }, { "epoch": 0.9359439207154944, "grad_norm": 0.3986512567562451, "learning_rate": 1.1430429201117476e-07, "loss": 0.5032, "step": 1936 }, { "epoch": 0.9364273628233019, "grad_norm": 0.39397430112101045, "learning_rate": 1.1261103279227858e-07, "loss": 0.5178, "step": 1937 }, { "epoch": 0.9369108049311095, "grad_norm": 0.4291769455926264, "learning_rate": 1.1093026613348601e-07, "loss": 0.5196, "step": 1938 }, { "epoch": 0.937394247038917, "grad_norm": 0.3917679927009391, "learning_rate": 1.0926199633097156e-07, "loss": 0.4919, "step": 1939 }, { "epoch": 0.9378776891467246, "grad_norm": 0.42599062790587783, "learning_rate": 1.0760622764896866e-07, "loss": 0.5147, "step": 1940 }, { "epoch": 0.9383611312545322, "grad_norm": 0.4023777838627757, "learning_rate": 1.0596296431975406e-07, "loss": 0.5156, "step": 1941 }, { "epoch": 0.9388445733623398, "grad_norm": 0.3966354448847634, "learning_rate": 1.0433221054364174e-07, "loss": 0.5065, "step": 1942 }, { "epoch": 0.9393280154701474, "grad_norm": 0.4013413460541232, "learning_rate": 1.0271397048897014e-07, "loss": 0.5053, "step": 1943 }, { "epoch": 0.939811457577955, "grad_norm": 0.37653088174864213, "learning_rate": 1.0110824829209164e-07, "loss": 0.4939, "step": 1944 }, { "epoch": 0.9402948996857626, "grad_norm": 0.399035469753345, "learning_rate": 9.951504805735979e-08, "loss": 0.5106, "step": 1945 }, { "epoch": 0.9407783417935702, "grad_norm": 0.3991989592342914, "learning_rate": 9.793437385712479e-08, "loss": 0.5153, "step": 1946 }, { "epoch": 0.9412617839013778, "grad_norm": 0.4057585743893453, "learning_rate": 9.636622973171583e-08, "loss": 0.51, "step": 1947 }, { "epoch": 0.9417452260091854, "grad_norm": 0.4054528739627977, "learning_rate": 9.481061968943717e-08, "loss": 0.516, "step": 1948 }, { "epoch": 0.942228668116993, "grad_norm": 0.3921980636127477, "learning_rate": 9.3267547706552e-08, "loss": 0.5051, "step": 1949 }, { "epoch": 0.9427121102248006, "grad_norm": 0.39913144156030567, "learning_rate": 9.17370177272775e-08, "loss": 0.5055, "step": 1950 }, { "epoch": 0.9431955523326082, "grad_norm": 0.4004586843938766, "learning_rate": 9.021903366377093e-08, "loss": 0.5164, "step": 1951 }, { "epoch": 0.9436789944404158, "grad_norm": 0.4037223050343566, "learning_rate": 8.8713599396123e-08, "loss": 0.5098, "step": 1952 }, { "epoch": 0.9441624365482234, "grad_norm": 0.39850858877215634, "learning_rate": 8.72207187723445e-08, "loss": 0.5211, "step": 1953 }, { "epoch": 0.9446458786560309, "grad_norm": 0.41059877409881057, "learning_rate": 8.5740395608358e-08, "loss": 0.5121, "step": 1954 }, { "epoch": 0.9451293207638385, "grad_norm": 0.40573184845060545, "learning_rate": 8.427263368798955e-08, "loss": 0.5256, "step": 1955 }, { "epoch": 0.9456127628716461, "grad_norm": 0.3966583772201167, "learning_rate": 8.281743676295639e-08, "loss": 0.5183, "step": 1956 }, { "epoch": 0.9460962049794537, "grad_norm": 0.40701943797191764, "learning_rate": 8.13748085528604e-08, "loss": 0.5135, "step": 1957 }, { "epoch": 0.9465796470872613, "grad_norm": 0.37606341196980025, "learning_rate": 7.99447527451741e-08, "loss": 0.4903, "step": 1958 }, { "epoch": 0.9470630891950689, "grad_norm": 0.4114856897492863, "learning_rate": 7.852727299523577e-08, "loss": 0.5068, "step": 1959 }, { "epoch": 0.9475465313028765, "grad_norm": 0.4093526523044555, "learning_rate": 7.71223729262377e-08, "loss": 0.5127, "step": 1960 }, { "epoch": 0.9480299734106841, "grad_norm": 0.4130076310229578, "learning_rate": 7.573005612921903e-08, "loss": 0.5121, "step": 1961 }, { "epoch": 0.9485134155184917, "grad_norm": 0.40254945616875554, "learning_rate": 7.435032616305238e-08, "loss": 0.5178, "step": 1962 }, { "epoch": 0.9489968576262993, "grad_norm": 0.4108181664423654, "learning_rate": 7.298318655443893e-08, "loss": 0.5078, "step": 1963 }, { "epoch": 0.9494802997341069, "grad_norm": 0.3954161759006289, "learning_rate": 7.162864079789777e-08, "loss": 0.5137, "step": 1964 }, { "epoch": 0.9499637418419145, "grad_norm": 0.3993428213266096, "learning_rate": 7.028669235575714e-08, "loss": 0.496, "step": 1965 }, { "epoch": 0.950447183949722, "grad_norm": 0.41493027982851327, "learning_rate": 6.895734465814597e-08, "loss": 0.5257, "step": 1966 }, { "epoch": 0.9509306260575296, "grad_norm": 0.38537633628397905, "learning_rate": 6.764060110298287e-08, "loss": 0.5208, "step": 1967 }, { "epoch": 0.9514140681653372, "grad_norm": 0.41057398606285567, "learning_rate": 6.633646505597113e-08, "loss": 0.5224, "step": 1968 }, { "epoch": 0.9518975102731448, "grad_norm": 0.4420797620121168, "learning_rate": 6.504493985058813e-08, "loss": 0.5108, "step": 1969 }, { "epoch": 0.9523809523809523, "grad_norm": 0.39854773939873966, "learning_rate": 6.376602878807592e-08, "loss": 0.5134, "step": 1970 }, { "epoch": 0.9528643944887599, "grad_norm": 0.4104047856111181, "learning_rate": 6.249973513743345e-08, "loss": 0.5079, "step": 1971 }, { "epoch": 0.9533478365965675, "grad_norm": 0.40077931999667527, "learning_rate": 6.124606213541052e-08, "loss": 0.5196, "step": 1972 }, { "epoch": 0.9538312787043751, "grad_norm": 0.43500257686302385, "learning_rate": 6.000501298649653e-08, "loss": 0.5197, "step": 1973 }, { "epoch": 0.9543147208121827, "grad_norm": 0.4186094433656202, "learning_rate": 5.8776590862911764e-08, "loss": 0.5135, "step": 1974 }, { "epoch": 0.9547981629199903, "grad_norm": 0.4119358911199865, "learning_rate": 5.756079890460342e-08, "loss": 0.5137, "step": 1975 }, { "epoch": 0.9552816050277979, "grad_norm": 0.39694645564275877, "learning_rate": 5.635764021923229e-08, "loss": 0.5121, "step": 1976 }, { "epoch": 0.9557650471356055, "grad_norm": 0.4154887872586203, "learning_rate": 5.5167117882171104e-08, "loss": 0.516, "step": 1977 }, { "epoch": 0.9562484892434131, "grad_norm": 0.7692472130509296, "learning_rate": 5.3989234936489556e-08, "loss": 0.5055, "step": 1978 }, { "epoch": 0.9567319313512207, "grad_norm": 0.4198618304821996, "learning_rate": 5.2823994392951497e-08, "loss": 0.5094, "step": 1979 }, { "epoch": 0.9572153734590283, "grad_norm": 0.39385026351820934, "learning_rate": 5.167139923000553e-08, "loss": 0.4933, "step": 1980 }, { "epoch": 0.9576988155668359, "grad_norm": 0.4159053427086944, "learning_rate": 5.053145239377777e-08, "loss": 0.4936, "step": 1981 }, { "epoch": 0.9581822576746435, "grad_norm": 0.3990167973444839, "learning_rate": 4.940415679806465e-08, "loss": 0.5124, "step": 1982 }, { "epoch": 0.9586656997824511, "grad_norm": 0.4012277528608715, "learning_rate": 4.828951532432457e-08, "loss": 0.5151, "step": 1983 }, { "epoch": 0.9591491418902587, "grad_norm": 0.4099731484035176, "learning_rate": 4.718753082167071e-08, "loss": 0.5191, "step": 1984 }, { "epoch": 0.9596325839980663, "grad_norm": 0.41474696363438857, "learning_rate": 4.6098206106863774e-08, "loss": 0.515, "step": 1985 }, { "epoch": 0.9601160261058739, "grad_norm": 0.4044716506352786, "learning_rate": 4.5021543964306466e-08, "loss": 0.5123, "step": 1986 }, { "epoch": 0.9605994682136814, "grad_norm": 0.40133573312591214, "learning_rate": 4.395754714603351e-08, "loss": 0.5133, "step": 1987 }, { "epoch": 0.961082910321489, "grad_norm": 0.4089192998561785, "learning_rate": 4.290621837170661e-08, "loss": 0.5236, "step": 1988 }, { "epoch": 0.9615663524292966, "grad_norm": 0.39452360352891674, "learning_rate": 4.186756032860728e-08, "loss": 0.5137, "step": 1989 }, { "epoch": 0.9620497945371042, "grad_norm": 0.39867371724056727, "learning_rate": 4.08415756716285e-08, "loss": 0.5093, "step": 1990 }, { "epoch": 0.9625332366449117, "grad_norm": 0.357065447847406, "learning_rate": 3.9828267023269696e-08, "loss": 0.4505, "step": 1991 }, { "epoch": 0.9630166787527193, "grad_norm": 0.427089982663271, "learning_rate": 3.8827636973630126e-08, "loss": 0.5101, "step": 1992 }, { "epoch": 0.9635001208605269, "grad_norm": 0.4025101063369687, "learning_rate": 3.783968808039995e-08, "loss": 0.5245, "step": 1993 }, { "epoch": 0.9639835629683345, "grad_norm": 0.4012223737061637, "learning_rate": 3.68644228688575e-08, "loss": 0.514, "step": 1994 }, { "epoch": 0.9644670050761421, "grad_norm": 0.39715847085154765, "learning_rate": 3.590184383185758e-08, "loss": 0.507, "step": 1995 }, { "epoch": 0.9649504471839497, "grad_norm": 0.4019019064729592, "learning_rate": 3.4951953429831484e-08, "loss": 0.5093, "step": 1996 }, { "epoch": 0.9654338892917573, "grad_norm": 0.3997438820964838, "learning_rate": 3.401475409077426e-08, "loss": 0.4987, "step": 1997 }, { "epoch": 0.9659173313995649, "grad_norm": 0.42247021949710184, "learning_rate": 3.309024821024354e-08, "loss": 0.5099, "step": 1998 }, { "epoch": 0.9664007735073725, "grad_norm": 0.4228197536210846, "learning_rate": 3.2178438151350685e-08, "loss": 0.5181, "step": 1999 }, { "epoch": 0.9668842156151801, "grad_norm": 0.40195549014330173, "learning_rate": 3.127932624475638e-08, "loss": 0.5118, "step": 2000 }, { "epoch": 0.9673676577229877, "grad_norm": 0.40083823310970984, "learning_rate": 3.039291478866169e-08, "loss": 0.5265, "step": 2001 }, { "epoch": 0.9678510998307953, "grad_norm": 0.4054162095867977, "learning_rate": 2.9519206048807535e-08, "loss": 0.5173, "step": 2002 }, { "epoch": 0.9683345419386028, "grad_norm": 0.4091589042260666, "learning_rate": 2.8658202258462498e-08, "loss": 0.5199, "step": 2003 }, { "epoch": 0.9688179840464104, "grad_norm": 0.37360554914951866, "learning_rate": 2.7809905618422227e-08, "loss": 0.4667, "step": 2004 }, { "epoch": 0.969301426154218, "grad_norm": 0.4264262470861418, "learning_rate": 2.6974318297001144e-08, "loss": 0.5208, "step": 2005 }, { "epoch": 0.9697848682620256, "grad_norm": 0.4133603239690626, "learning_rate": 2.615144243002743e-08, "loss": 0.5049, "step": 2006 }, { "epoch": 0.9702683103698332, "grad_norm": 0.41234915425778607, "learning_rate": 2.534128012083914e-08, "loss": 0.5215, "step": 2007 }, { "epoch": 0.9707517524776408, "grad_norm": 0.39530605693418713, "learning_rate": 2.4543833440275332e-08, "loss": 0.5096, "step": 2008 }, { "epoch": 0.9712351945854484, "grad_norm": 0.42034129099753553, "learning_rate": 2.375910442667495e-08, "loss": 0.5111, "step": 2009 }, { "epoch": 0.971718636693256, "grad_norm": 0.4128961831040994, "learning_rate": 2.298709508586794e-08, "loss": 0.5136, "step": 2010 }, { "epoch": 0.9722020788010636, "grad_norm": 0.40946352601157776, "learning_rate": 2.2227807391172474e-08, "loss": 0.5239, "step": 2011 }, { "epoch": 0.9726855209088712, "grad_norm": 0.3999251664775986, "learning_rate": 2.1481243283389408e-08, "loss": 0.514, "step": 2012 }, { "epoch": 0.9731689630166788, "grad_norm": 0.412440530125608, "learning_rate": 2.074740467079672e-08, "loss": 0.5174, "step": 2013 }, { "epoch": 0.9736524051244864, "grad_norm": 0.40416741933458183, "learning_rate": 2.002629342914453e-08, "loss": 0.5173, "step": 2014 }, { "epoch": 0.974135847232294, "grad_norm": 0.4091688587167212, "learning_rate": 1.9317911401651734e-08, "loss": 0.5035, "step": 2015 }, { "epoch": 0.9746192893401016, "grad_norm": 0.41181207482580323, "learning_rate": 1.862226039899995e-08, "loss": 0.5194, "step": 2016 }, { "epoch": 0.9751027314479092, "grad_norm": 0.38917062513507206, "learning_rate": 1.7939342199329023e-08, "loss": 0.5081, "step": 2017 }, { "epoch": 0.9755861735557168, "grad_norm": 0.4058095413062891, "learning_rate": 1.7269158548232633e-08, "loss": 0.514, "step": 2018 }, { "epoch": 0.9760696156635243, "grad_norm": 0.3909310997249257, "learning_rate": 1.661171115875493e-08, "loss": 0.5086, "step": 2019 }, { "epoch": 0.9765530577713318, "grad_norm": 0.3924463631554743, "learning_rate": 1.5967001711383877e-08, "loss": 0.5074, "step": 2020 }, { "epoch": 0.9770364998791394, "grad_norm": 0.3897349184690982, "learning_rate": 1.5335031854049055e-08, "loss": 0.5164, "step": 2021 }, { "epoch": 0.977519941986947, "grad_norm": 0.4006696563415638, "learning_rate": 1.4715803202116075e-08, "loss": 0.516, "step": 2022 }, { "epoch": 0.9780033840947546, "grad_norm": 0.39127207135897235, "learning_rate": 1.4109317338383832e-08, "loss": 0.4864, "step": 2023 }, { "epoch": 0.9784868262025622, "grad_norm": 0.41074499955315413, "learning_rate": 1.3515575813078386e-08, "loss": 0.5276, "step": 2024 }, { "epoch": 0.9789702683103698, "grad_norm": 0.3989638057067789, "learning_rate": 1.2934580143851294e-08, "loss": 0.5116, "step": 2025 }, { "epoch": 0.9794537104181774, "grad_norm": 0.4179669163774858, "learning_rate": 1.2366331815774069e-08, "loss": 0.5169, "step": 2026 }, { "epoch": 0.979937152525985, "grad_norm": 0.420952308284563, "learning_rate": 1.1810832281335394e-08, "loss": 0.5221, "step": 2027 }, { "epoch": 0.9804205946337926, "grad_norm": 0.40444089801366945, "learning_rate": 1.1268082960436688e-08, "loss": 0.526, "step": 2028 }, { "epoch": 0.9809040367416002, "grad_norm": 0.39774599938725236, "learning_rate": 1.0738085240389883e-08, "loss": 0.5158, "step": 2029 }, { "epoch": 0.9813874788494078, "grad_norm": 0.40528876629152616, "learning_rate": 1.0220840475910765e-08, "loss": 0.5148, "step": 2030 }, { "epoch": 0.9818709209572154, "grad_norm": 0.3880426443734388, "learning_rate": 9.716349989118412e-09, "loss": 0.4977, "step": 2031 }, { "epoch": 0.982354363065023, "grad_norm": 0.397796748759872, "learning_rate": 9.224615069532428e-09, "loss": 0.5183, "step": 2032 }, { "epoch": 0.9828378051728306, "grad_norm": 0.41455233095701044, "learning_rate": 8.745636974066274e-09, "loss": 0.5151, "step": 2033 }, { "epoch": 0.9833212472806382, "grad_norm": 0.39149878382311915, "learning_rate": 8.279416927026163e-09, "loss": 0.4852, "step": 2034 }, { "epoch": 0.9838046893884458, "grad_norm": 0.428044296219464, "learning_rate": 7.82595612010828e-09, "loss": 0.5088, "step": 2035 }, { "epoch": 0.9842881314962533, "grad_norm": 0.39066146033771326, "learning_rate": 7.385255712395456e-09, "loss": 0.5092, "step": 2036 }, { "epoch": 0.9847715736040609, "grad_norm": 0.3944546565780817, "learning_rate": 6.9573168303532775e-09, "loss": 0.5048, "step": 2037 }, { "epoch": 0.9852550157118685, "grad_norm": 0.3875137169857006, "learning_rate": 6.542140567827871e-09, "loss": 0.5166, "step": 2038 }, { "epoch": 0.9857384578196761, "grad_norm": 0.3942008356766705, "learning_rate": 6.1397279860431205e-09, "loss": 0.4846, "step": 2039 }, { "epoch": 0.9862218999274837, "grad_norm": 0.4122295335735729, "learning_rate": 5.750080113598455e-09, "loss": 0.5191, "step": 2040 }, { "epoch": 0.9867053420352913, "grad_norm": 0.4027922091033447, "learning_rate": 5.373197946464403e-09, "loss": 0.509, "step": 2041 }, { "epoch": 0.9871887841430989, "grad_norm": 0.41834346896126373, "learning_rate": 5.009082447983149e-09, "loss": 0.52, "step": 2042 }, { "epoch": 0.9876722262509064, "grad_norm": 0.4293813449158595, "learning_rate": 4.65773454886298e-09, "loss": 0.5131, "step": 2043 }, { "epoch": 0.988155668358714, "grad_norm": 0.3928243234499177, "learning_rate": 4.319155147176624e-09, "loss": 0.515, "step": 2044 }, { "epoch": 0.9886391104665216, "grad_norm": 0.4041253868270013, "learning_rate": 3.9933451083612464e-09, "loss": 0.5001, "step": 2045 }, { "epoch": 0.9891225525743292, "grad_norm": 0.4010404057661429, "learning_rate": 3.6803052652134572e-09, "loss": 0.5077, "step": 2046 }, { "epoch": 0.9896059946821368, "grad_norm": 0.4298866341542551, "learning_rate": 3.3800364178881996e-09, "loss": 0.5112, "step": 2047 }, { "epoch": 0.9900894367899444, "grad_norm": 0.39490801058055036, "learning_rate": 3.092539333896527e-09, "loss": 0.5087, "step": 2048 }, { "epoch": 0.990572878897752, "grad_norm": 0.4110997944280951, "learning_rate": 2.817814748104497e-09, "loss": 0.5044, "step": 2049 }, { "epoch": 0.9910563210055596, "grad_norm": 0.4386800273412446, "learning_rate": 2.555863362730393e-09, "loss": 0.5217, "step": 2050 }, { "epoch": 0.9915397631133672, "grad_norm": 0.411463570514358, "learning_rate": 2.30668584734306e-09, "loss": 0.5117, "step": 2051 }, { "epoch": 0.9920232052211748, "grad_norm": 0.40419424446848334, "learning_rate": 2.070282838859683e-09, "loss": 0.5056, "step": 2052 }, { "epoch": 0.9925066473289823, "grad_norm": 0.40703300041841234, "learning_rate": 1.8466549415463442e-09, "loss": 0.5319, "step": 2053 }, { "epoch": 0.9929900894367899, "grad_norm": 0.42530845899182246, "learning_rate": 1.635802727013025e-09, "loss": 0.5138, "step": 2054 }, { "epoch": 0.9934735315445975, "grad_norm": 0.4058607106461754, "learning_rate": 1.4377267342158274e-09, "loss": 0.4883, "step": 2055 }, { "epoch": 0.9939569736524051, "grad_norm": 0.4062782037922318, "learning_rate": 1.2524274694525329e-09, "loss": 0.5225, "step": 2056 }, { "epoch": 0.9944404157602127, "grad_norm": 4.724922277274833, "learning_rate": 1.0799054063626024e-09, "loss": 0.5232, "step": 2057 }, { "epoch": 0.9949238578680203, "grad_norm": 0.3906054598062824, "learning_rate": 9.201609859271765e-10, "loss": 0.5132, "step": 2058 }, { "epoch": 0.9954072999758279, "grad_norm": 0.3973857264750407, "learning_rate": 7.731946164657445e-10, "loss": 0.5083, "step": 2059 }, { "epoch": 0.9958907420836355, "grad_norm": 0.39787099969142303, "learning_rate": 6.390066736355893e-10, "loss": 0.5138, "step": 2060 }, { "epoch": 0.9963741841914431, "grad_norm": 0.4073985533388715, "learning_rate": 5.17597500432343e-10, "loss": 0.5134, "step": 2061 }, { "epoch": 0.9968576262992507, "grad_norm": 0.4217388952314898, "learning_rate": 4.089674071872107e-10, "loss": 0.5204, "step": 2062 }, { "epoch": 0.9973410684070583, "grad_norm": 0.3938957473860418, "learning_rate": 3.131166715680811e-10, "loss": 0.5134, "step": 2063 }, { "epoch": 0.9978245105148659, "grad_norm": 0.4133454970429142, "learning_rate": 2.3004553857675082e-10, "loss": 0.5136, "step": 2064 }, { "epoch": 0.9983079526226735, "grad_norm": 3.9528345451843885, "learning_rate": 1.5975422055003465e-10, "loss": 0.5088, "step": 2065 }, { "epoch": 0.9987913947304811, "grad_norm": 0.4124191883874225, "learning_rate": 1.022428971581002e-10, "loss": 0.5106, "step": 2066 }, { "epoch": 0.9992748368382887, "grad_norm": 0.42214447971757757, "learning_rate": 5.751171540391287e-11, "loss": 0.513, "step": 2067 }, { "epoch": 0.9997582789460963, "grad_norm": 0.4373529560530007, "learning_rate": 2.556078962490105e-11, "loss": 0.5278, "step": 2068 }, { "epoch": 1.0, "grad_norm": 0.4373529560530007, "learning_rate": 6.390201489625547e-12, "loss": 0.4723, "step": 2069 }, { "epoch": 1.0, "step": 2069, "total_flos": 2898754626256896.0, "train_loss": 0.5475362745847836, "train_runtime": 128435.5344, "train_samples_per_second": 2.061, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 2069, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 208, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2898754626256896.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }