{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.40097323600973234, "eval_steps": 500, "global_step": 4120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.732360097323601e-05, "grad_norm": 16.13226202148005, "learning_rate": 3.2362459546925574e-08, "loss": 1.1997, "step": 1 }, { "epoch": 0.00019464720194647202, "grad_norm": 15.765097511926365, "learning_rate": 6.472491909385115e-08, "loss": 1.384, "step": 2 }, { "epoch": 0.00029197080291970805, "grad_norm": 16.64113665586635, "learning_rate": 9.70873786407767e-08, "loss": 1.2291, "step": 3 }, { "epoch": 0.00038929440389294404, "grad_norm": 20.34864047521242, "learning_rate": 1.294498381877023e-07, "loss": 0.9025, "step": 4 }, { "epoch": 0.00048661800486618007, "grad_norm": 28.710711096046108, "learning_rate": 1.6181229773462782e-07, "loss": 1.0305, "step": 5 }, { "epoch": 0.0005839416058394161, "grad_norm": 21.945801992582915, "learning_rate": 1.941747572815534e-07, "loss": 1.0979, "step": 6 }, { "epoch": 0.0006812652068126521, "grad_norm": 23.947905905644966, "learning_rate": 2.26537216828479e-07, "loss": 1.1909, "step": 7 }, { "epoch": 0.0007785888077858881, "grad_norm": 19.835016686730835, "learning_rate": 2.588996763754046e-07, "loss": 1.2083, "step": 8 }, { "epoch": 0.0008759124087591241, "grad_norm": 16.926846352507788, "learning_rate": 2.9126213592233014e-07, "loss": 1.2369, "step": 9 }, { "epoch": 0.0009732360097323601, "grad_norm": 21.349924470647284, "learning_rate": 3.2362459546925565e-07, "loss": 1.0052, "step": 10 }, { "epoch": 0.0010705596107055961, "grad_norm": 25.127579741628022, "learning_rate": 3.5598705501618125e-07, "loss": 1.2631, "step": 11 }, { "epoch": 0.0011678832116788322, "grad_norm": 12.524049131196549, "learning_rate": 3.883495145631068e-07, "loss": 1.0884, "step": 12 }, { "epoch": 0.001265206812652068, "grad_norm": 20.706648432487587, "learning_rate": 4.207119741100324e-07, "loss": 1.1469, "step": 13 }, { "epoch": 0.0013625304136253042, "grad_norm": 17.655230197318655, "learning_rate": 4.53074433656958e-07, "loss": 1.2922, "step": 14 }, { "epoch": 0.00145985401459854, "grad_norm": 16.550170455008725, "learning_rate": 4.854368932038835e-07, "loss": 1.1792, "step": 15 }, { "epoch": 0.0015571776155717761, "grad_norm": 24.456798845425887, "learning_rate": 5.177993527508092e-07, "loss": 1.0804, "step": 16 }, { "epoch": 0.0016545012165450122, "grad_norm": 14.659117460865279, "learning_rate": 5.501618122977346e-07, "loss": 1.0973, "step": 17 }, { "epoch": 0.0017518248175182481, "grad_norm": 15.324823146378344, "learning_rate": 5.825242718446603e-07, "loss": 0.9791, "step": 18 }, { "epoch": 0.0018491484184914842, "grad_norm": 12.483869597287145, "learning_rate": 6.148867313915858e-07, "loss": 1.0829, "step": 19 }, { "epoch": 0.0019464720194647203, "grad_norm": 11.921211994178957, "learning_rate": 6.472491909385113e-07, "loss": 0.6862, "step": 20 }, { "epoch": 0.0020437956204379564, "grad_norm": 14.53279456676939, "learning_rate": 6.79611650485437e-07, "loss": 0.7814, "step": 21 }, { "epoch": 0.0021411192214111923, "grad_norm": 15.68359520937104, "learning_rate": 7.119741100323625e-07, "loss": 0.883, "step": 22 }, { "epoch": 0.002238442822384428, "grad_norm": 14.062468532950906, "learning_rate": 7.443365695792882e-07, "loss": 1.0087, "step": 23 }, { "epoch": 0.0023357664233576644, "grad_norm": 11.150778403716444, "learning_rate": 7.766990291262136e-07, "loss": 0.4884, "step": 24 }, { "epoch": 0.0024330900243309003, "grad_norm": 7.740982223602688, "learning_rate": 8.090614886731392e-07, "loss": 0.8543, "step": 25 }, { "epoch": 0.002530413625304136, "grad_norm": 6.4338060169141915, "learning_rate": 8.414239482200648e-07, "loss": 0.7948, "step": 26 }, { "epoch": 0.002627737226277372, "grad_norm": 6.227022582398367, "learning_rate": 8.737864077669904e-07, "loss": 0.7814, "step": 27 }, { "epoch": 0.0027250608272506084, "grad_norm": 7.989531820662516, "learning_rate": 9.06148867313916e-07, "loss": 0.5645, "step": 28 }, { "epoch": 0.0028223844282238442, "grad_norm": 6.4745089193753, "learning_rate": 9.385113268608415e-07, "loss": 0.6802, "step": 29 }, { "epoch": 0.00291970802919708, "grad_norm": 8.23650018531745, "learning_rate": 9.70873786407767e-07, "loss": 0.6218, "step": 30 }, { "epoch": 0.0030170316301703164, "grad_norm": 4.915479010119541, "learning_rate": 1.0032362459546926e-06, "loss": 0.8879, "step": 31 }, { "epoch": 0.0031143552311435523, "grad_norm": 4.288138757396447, "learning_rate": 1.0355987055016184e-06, "loss": 0.5917, "step": 32 }, { "epoch": 0.003211678832116788, "grad_norm": 4.230901102531741, "learning_rate": 1.0679611650485437e-06, "loss": 0.7373, "step": 33 }, { "epoch": 0.0033090024330900245, "grad_norm": 4.714303656539792, "learning_rate": 1.1003236245954693e-06, "loss": 0.5886, "step": 34 }, { "epoch": 0.0034063260340632603, "grad_norm": 4.1204943469600925, "learning_rate": 1.132686084142395e-06, "loss": 0.5991, "step": 35 }, { "epoch": 0.0035036496350364962, "grad_norm": 3.124375547961107, "learning_rate": 1.1650485436893206e-06, "loss": 0.432, "step": 36 }, { "epoch": 0.0036009732360097325, "grad_norm": 3.741153837090354, "learning_rate": 1.197411003236246e-06, "loss": 0.6379, "step": 37 }, { "epoch": 0.0036982968369829684, "grad_norm": 3.7740270813504506, "learning_rate": 1.2297734627831717e-06, "loss": 0.5595, "step": 38 }, { "epoch": 0.0037956204379562043, "grad_norm": 4.783986424289694, "learning_rate": 1.2621359223300972e-06, "loss": 0.8717, "step": 39 }, { "epoch": 0.0038929440389294406, "grad_norm": 4.242597978097827, "learning_rate": 1.2944983818770226e-06, "loss": 0.6632, "step": 40 }, { "epoch": 0.0039902676399026765, "grad_norm": 4.309602952976607, "learning_rate": 1.3268608414239483e-06, "loss": 0.7191, "step": 41 }, { "epoch": 0.004087591240875913, "grad_norm": 4.136462382872819, "learning_rate": 1.359223300970874e-06, "loss": 0.6782, "step": 42 }, { "epoch": 0.004184914841849148, "grad_norm": 4.2148643401229, "learning_rate": 1.3915857605177997e-06, "loss": 0.8932, "step": 43 }, { "epoch": 0.0042822384428223845, "grad_norm": 3.829331188520966, "learning_rate": 1.423948220064725e-06, "loss": 0.4697, "step": 44 }, { "epoch": 0.004379562043795621, "grad_norm": 3.4564347781684557, "learning_rate": 1.4563106796116506e-06, "loss": 0.3377, "step": 45 }, { "epoch": 0.004476885644768856, "grad_norm": 3.319649807488789, "learning_rate": 1.4886731391585763e-06, "loss": 0.4589, "step": 46 }, { "epoch": 0.0045742092457420926, "grad_norm": 3.8856546910308034, "learning_rate": 1.5210355987055017e-06, "loss": 0.8413, "step": 47 }, { "epoch": 0.004671532846715329, "grad_norm": 3.7955924171570605, "learning_rate": 1.5533980582524272e-06, "loss": 0.588, "step": 48 }, { "epoch": 0.004768856447688564, "grad_norm": 4.5762685715882805, "learning_rate": 1.585760517799353e-06, "loss": 0.6472, "step": 49 }, { "epoch": 0.004866180048661801, "grad_norm": 4.284420204063246, "learning_rate": 1.6181229773462783e-06, "loss": 0.5233, "step": 50 }, { "epoch": 0.004963503649635037, "grad_norm": 4.0399534913964645, "learning_rate": 1.650485436893204e-06, "loss": 0.6737, "step": 51 }, { "epoch": 0.005060827250608272, "grad_norm": 4.850258079033273, "learning_rate": 1.6828478964401297e-06, "loss": 0.5017, "step": 52 }, { "epoch": 0.005158150851581509, "grad_norm": 3.289730774319516, "learning_rate": 1.715210355987055e-06, "loss": 0.6378, "step": 53 }, { "epoch": 0.005255474452554744, "grad_norm": 3.116783938182044, "learning_rate": 1.7475728155339808e-06, "loss": 0.5681, "step": 54 }, { "epoch": 0.00535279805352798, "grad_norm": 3.5896487509946677, "learning_rate": 1.7799352750809063e-06, "loss": 0.5222, "step": 55 }, { "epoch": 0.005450121654501217, "grad_norm": 3.3627737905222146, "learning_rate": 1.812297734627832e-06, "loss": 0.351, "step": 56 }, { "epoch": 0.005547445255474452, "grad_norm": 3.405981770724818, "learning_rate": 1.8446601941747574e-06, "loss": 0.5832, "step": 57 }, { "epoch": 0.0056447688564476885, "grad_norm": 3.231134680455488, "learning_rate": 1.877022653721683e-06, "loss": 0.5558, "step": 58 }, { "epoch": 0.005742092457420925, "grad_norm": 4.2963387449464605, "learning_rate": 1.9093851132686085e-06, "loss": 0.7544, "step": 59 }, { "epoch": 0.00583941605839416, "grad_norm": 3.3678084152804315, "learning_rate": 1.941747572815534e-06, "loss": 0.554, "step": 60 }, { "epoch": 0.0059367396593673965, "grad_norm": 3.635756089652443, "learning_rate": 1.9741100323624596e-06, "loss": 0.5312, "step": 61 }, { "epoch": 0.006034063260340633, "grad_norm": 3.91764256649437, "learning_rate": 2.006472491909385e-06, "loss": 0.4329, "step": 62 }, { "epoch": 0.006131386861313868, "grad_norm": 3.4866607421863565, "learning_rate": 2.0388349514563107e-06, "loss": 0.4453, "step": 63 }, { "epoch": 0.006228710462287105, "grad_norm": 2.9369425143161147, "learning_rate": 2.0711974110032367e-06, "loss": 0.467, "step": 64 }, { "epoch": 0.006326034063260341, "grad_norm": 3.0906723589687024, "learning_rate": 2.103559870550162e-06, "loss": 0.3917, "step": 65 }, { "epoch": 0.006423357664233576, "grad_norm": 3.5121616512799747, "learning_rate": 2.1359223300970874e-06, "loss": 0.6428, "step": 66 }, { "epoch": 0.006520681265206813, "grad_norm": 3.470270871630247, "learning_rate": 2.1682847896440134e-06, "loss": 0.586, "step": 67 }, { "epoch": 0.006618004866180049, "grad_norm": 2.8689679430782498, "learning_rate": 2.2006472491909385e-06, "loss": 0.2938, "step": 68 }, { "epoch": 0.006715328467153284, "grad_norm": 4.115573400175418, "learning_rate": 2.2330097087378645e-06, "loss": 0.3855, "step": 69 }, { "epoch": 0.006812652068126521, "grad_norm": 3.903319335204406, "learning_rate": 2.26537216828479e-06, "loss": 0.6272, "step": 70 }, { "epoch": 0.006909975669099757, "grad_norm": 2.649165320750572, "learning_rate": 2.297734627831715e-06, "loss": 0.5229, "step": 71 }, { "epoch": 0.0070072992700729924, "grad_norm": 2.8543884488184235, "learning_rate": 2.330097087378641e-06, "loss": 0.4006, "step": 72 }, { "epoch": 0.007104622871046229, "grad_norm": 2.9817247056794134, "learning_rate": 2.3624595469255667e-06, "loss": 0.2331, "step": 73 }, { "epoch": 0.007201946472019465, "grad_norm": 3.592880940053797, "learning_rate": 2.394822006472492e-06, "loss": 0.4889, "step": 74 }, { "epoch": 0.0072992700729927005, "grad_norm": 2.89844013224274, "learning_rate": 2.427184466019418e-06, "loss": 0.4711, "step": 75 }, { "epoch": 0.007396593673965937, "grad_norm": 2.6071345596032134, "learning_rate": 2.4595469255663434e-06, "loss": 0.4844, "step": 76 }, { "epoch": 0.007493917274939173, "grad_norm": 2.9053930844585776, "learning_rate": 2.491909385113269e-06, "loss": 0.5163, "step": 77 }, { "epoch": 0.0075912408759124085, "grad_norm": 3.4016540038418115, "learning_rate": 2.5242718446601945e-06, "loss": 0.5852, "step": 78 }, { "epoch": 0.007688564476885645, "grad_norm": 2.7133170026932887, "learning_rate": 2.55663430420712e-06, "loss": 0.4934, "step": 79 }, { "epoch": 0.007785888077858881, "grad_norm": 3.2321439410345585, "learning_rate": 2.588996763754045e-06, "loss": 0.62, "step": 80 }, { "epoch": 0.007883211678832117, "grad_norm": 2.6835948161160545, "learning_rate": 2.621359223300971e-06, "loss": 0.4689, "step": 81 }, { "epoch": 0.007980535279805353, "grad_norm": 4.716894934604404, "learning_rate": 2.6537216828478967e-06, "loss": 0.3364, "step": 82 }, { "epoch": 0.00807785888077859, "grad_norm": 2.6507857723180646, "learning_rate": 2.686084142394822e-06, "loss": 0.3785, "step": 83 }, { "epoch": 0.008175182481751826, "grad_norm": 2.356714630861861, "learning_rate": 2.718446601941748e-06, "loss": 0.2591, "step": 84 }, { "epoch": 0.00827250608272506, "grad_norm": 2.755477478688418, "learning_rate": 2.7508090614886734e-06, "loss": 0.4762, "step": 85 }, { "epoch": 0.008369829683698296, "grad_norm": 3.7771581783688837, "learning_rate": 2.7831715210355993e-06, "loss": 0.4627, "step": 86 }, { "epoch": 0.008467153284671533, "grad_norm": 2.8568450908810257, "learning_rate": 2.8155339805825245e-06, "loss": 0.4322, "step": 87 }, { "epoch": 0.008564476885644769, "grad_norm": 2.914756058289183, "learning_rate": 2.84789644012945e-06, "loss": 0.4835, "step": 88 }, { "epoch": 0.008661800486618005, "grad_norm": 2.414182197047686, "learning_rate": 2.880258899676376e-06, "loss": 0.493, "step": 89 }, { "epoch": 0.008759124087591242, "grad_norm": 2.8597853736106975, "learning_rate": 2.912621359223301e-06, "loss": 0.6063, "step": 90 }, { "epoch": 0.008856447688564476, "grad_norm": 2.4567808863650007, "learning_rate": 2.9449838187702267e-06, "loss": 0.5874, "step": 91 }, { "epoch": 0.008953771289537713, "grad_norm": 2.819434031784131, "learning_rate": 2.9773462783171527e-06, "loss": 0.552, "step": 92 }, { "epoch": 0.009051094890510949, "grad_norm": 1.9840396387462764, "learning_rate": 3.0097087378640778e-06, "loss": 0.3736, "step": 93 }, { "epoch": 0.009148418491484185, "grad_norm": 2.52047300259283, "learning_rate": 3.0420711974110033e-06, "loss": 0.407, "step": 94 }, { "epoch": 0.009245742092457421, "grad_norm": 3.140839526692518, "learning_rate": 3.0744336569579293e-06, "loss": 0.6513, "step": 95 }, { "epoch": 0.009343065693430658, "grad_norm": 3.1368865731879554, "learning_rate": 3.1067961165048544e-06, "loss": 0.4804, "step": 96 }, { "epoch": 0.009440389294403892, "grad_norm": 2.6987222968513196, "learning_rate": 3.13915857605178e-06, "loss": 0.4228, "step": 97 }, { "epoch": 0.009537712895377129, "grad_norm": 2.5779408707034026, "learning_rate": 3.171521035598706e-06, "loss": 0.4654, "step": 98 }, { "epoch": 0.009635036496350365, "grad_norm": 2.5189587792888934, "learning_rate": 3.2038834951456315e-06, "loss": 0.5465, "step": 99 }, { "epoch": 0.009732360097323601, "grad_norm": 2.457408493992738, "learning_rate": 3.2362459546925567e-06, "loss": 0.5077, "step": 100 }, { "epoch": 0.009829683698296838, "grad_norm": 2.445932328031196, "learning_rate": 3.2686084142394826e-06, "loss": 0.492, "step": 101 }, { "epoch": 0.009927007299270074, "grad_norm": 2.3199141960061915, "learning_rate": 3.300970873786408e-06, "loss": 0.4432, "step": 102 }, { "epoch": 0.010024330900243308, "grad_norm": 3.88769555780582, "learning_rate": 3.3333333333333333e-06, "loss": 0.3684, "step": 103 }, { "epoch": 0.010121654501216545, "grad_norm": 2.63905676146042, "learning_rate": 3.3656957928802593e-06, "loss": 0.4238, "step": 104 }, { "epoch": 0.010218978102189781, "grad_norm": 3.0073749174392885, "learning_rate": 3.398058252427185e-06, "loss": 0.4655, "step": 105 }, { "epoch": 0.010316301703163017, "grad_norm": 2.613524831872459, "learning_rate": 3.43042071197411e-06, "loss": 0.4948, "step": 106 }, { "epoch": 0.010413625304136254, "grad_norm": 2.4293628733346764, "learning_rate": 3.462783171521036e-06, "loss": 0.3717, "step": 107 }, { "epoch": 0.010510948905109488, "grad_norm": 3.3036504610837016, "learning_rate": 3.4951456310679615e-06, "loss": 0.4939, "step": 108 }, { "epoch": 0.010608272506082725, "grad_norm": 2.6808221933664846, "learning_rate": 3.5275080906148866e-06, "loss": 0.4809, "step": 109 }, { "epoch": 0.01070559610705596, "grad_norm": 2.853958419293739, "learning_rate": 3.5598705501618126e-06, "loss": 0.4066, "step": 110 }, { "epoch": 0.010802919708029197, "grad_norm": 5.3412930378250145, "learning_rate": 3.592233009708738e-06, "loss": 0.3599, "step": 111 }, { "epoch": 0.010900243309002433, "grad_norm": 2.983669976646381, "learning_rate": 3.624595469255664e-06, "loss": 0.6187, "step": 112 }, { "epoch": 0.01099756690997567, "grad_norm": 3.388543821878077, "learning_rate": 3.6569579288025893e-06, "loss": 0.717, "step": 113 }, { "epoch": 0.011094890510948904, "grad_norm": 3.0720120062792127, "learning_rate": 3.689320388349515e-06, "loss": 0.5057, "step": 114 }, { "epoch": 0.01119221411192214, "grad_norm": 2.521868238475485, "learning_rate": 3.721682847896441e-06, "loss": 0.4308, "step": 115 }, { "epoch": 0.011289537712895377, "grad_norm": 2.641085251645149, "learning_rate": 3.754045307443366e-06, "loss": 0.4047, "step": 116 }, { "epoch": 0.011386861313868613, "grad_norm": 2.6936547530255828, "learning_rate": 3.7864077669902915e-06, "loss": 0.5548, "step": 117 }, { "epoch": 0.01148418491484185, "grad_norm": 5.599830434139348, "learning_rate": 3.818770226537217e-06, "loss": 0.5338, "step": 118 }, { "epoch": 0.011581508515815086, "grad_norm": 2.6372065340185378, "learning_rate": 3.851132686084142e-06, "loss": 0.4833, "step": 119 }, { "epoch": 0.01167883211678832, "grad_norm": 2.555049765563167, "learning_rate": 3.883495145631068e-06, "loss": 0.4295, "step": 120 }, { "epoch": 0.011776155717761557, "grad_norm": 2.22725048478721, "learning_rate": 3.915857605177994e-06, "loss": 0.4074, "step": 121 }, { "epoch": 0.011873479318734793, "grad_norm": 3.0093045583939984, "learning_rate": 3.948220064724919e-06, "loss": 0.7168, "step": 122 }, { "epoch": 0.01197080291970803, "grad_norm": 2.8800338131191223, "learning_rate": 3.980582524271845e-06, "loss": 0.3826, "step": 123 }, { "epoch": 0.012068126520681266, "grad_norm": 2.3197904571086974, "learning_rate": 4.01294498381877e-06, "loss": 0.2584, "step": 124 }, { "epoch": 0.012165450121654502, "grad_norm": 2.929540360888414, "learning_rate": 4.045307443365696e-06, "loss": 0.4617, "step": 125 }, { "epoch": 0.012262773722627737, "grad_norm": 2.5602803735383137, "learning_rate": 4.0776699029126215e-06, "loss": 0.2561, "step": 126 }, { "epoch": 0.012360097323600973, "grad_norm": 2.676345297957673, "learning_rate": 4.1100323624595475e-06, "loss": 0.2996, "step": 127 }, { "epoch": 0.01245742092457421, "grad_norm": 1.9047794610871986, "learning_rate": 4.1423948220064734e-06, "loss": 0.3475, "step": 128 }, { "epoch": 0.012554744525547445, "grad_norm": 2.9014607006450555, "learning_rate": 4.1747572815533986e-06, "loss": 0.4748, "step": 129 }, { "epoch": 0.012652068126520682, "grad_norm": 2.2992367182815987, "learning_rate": 4.207119741100324e-06, "loss": 0.3465, "step": 130 }, { "epoch": 0.012749391727493918, "grad_norm": 2.668874383033437, "learning_rate": 4.23948220064725e-06, "loss": 0.6119, "step": 131 }, { "epoch": 0.012846715328467153, "grad_norm": 2.69106703615133, "learning_rate": 4.271844660194175e-06, "loss": 0.4743, "step": 132 }, { "epoch": 0.012944038929440389, "grad_norm": 2.972314561813759, "learning_rate": 4.304207119741101e-06, "loss": 0.5766, "step": 133 }, { "epoch": 0.013041362530413625, "grad_norm": 2.7487017428059635, "learning_rate": 4.336569579288027e-06, "loss": 0.5818, "step": 134 }, { "epoch": 0.013138686131386862, "grad_norm": 3.1117207482379663, "learning_rate": 4.368932038834952e-06, "loss": 0.6468, "step": 135 }, { "epoch": 0.013236009732360098, "grad_norm": 2.781796948090657, "learning_rate": 4.401294498381877e-06, "loss": 0.7209, "step": 136 }, { "epoch": 0.013333333333333334, "grad_norm": 2.5480533986327556, "learning_rate": 4.433656957928803e-06, "loss": 0.5907, "step": 137 }, { "epoch": 0.013430656934306569, "grad_norm": 2.054397852683208, "learning_rate": 4.466019417475729e-06, "loss": 0.4079, "step": 138 }, { "epoch": 0.013527980535279805, "grad_norm": 2.2564046621809037, "learning_rate": 4.498381877022654e-06, "loss": 0.4, "step": 139 }, { "epoch": 0.013625304136253041, "grad_norm": 2.8739841159071022, "learning_rate": 4.53074433656958e-06, "loss": 0.5819, "step": 140 }, { "epoch": 0.013722627737226278, "grad_norm": 2.6418540847993657, "learning_rate": 4.563106796116505e-06, "loss": 0.589, "step": 141 }, { "epoch": 0.013819951338199514, "grad_norm": 2.431908870746442, "learning_rate": 4.59546925566343e-06, "loss": 0.5468, "step": 142 }, { "epoch": 0.013917274939172749, "grad_norm": 4.44933942542394, "learning_rate": 4.627831715210356e-06, "loss": 0.3846, "step": 143 }, { "epoch": 0.014014598540145985, "grad_norm": 2.2469929628351126, "learning_rate": 4.660194174757282e-06, "loss": 0.3047, "step": 144 }, { "epoch": 0.014111922141119221, "grad_norm": 2.8361034502388205, "learning_rate": 4.6925566343042074e-06, "loss": 0.4186, "step": 145 }, { "epoch": 0.014209245742092457, "grad_norm": 2.485184255788147, "learning_rate": 4.724919093851133e-06, "loss": 0.455, "step": 146 }, { "epoch": 0.014306569343065694, "grad_norm": 2.677307495548506, "learning_rate": 4.7572815533980585e-06, "loss": 0.6346, "step": 147 }, { "epoch": 0.01440389294403893, "grad_norm": 2.9440091029213034, "learning_rate": 4.789644012944984e-06, "loss": 0.4961, "step": 148 }, { "epoch": 0.014501216545012165, "grad_norm": 2.6810327828724723, "learning_rate": 4.82200647249191e-06, "loss": 0.3754, "step": 149 }, { "epoch": 0.014598540145985401, "grad_norm": 2.519257002697837, "learning_rate": 4.854368932038836e-06, "loss": 0.249, "step": 150 }, { "epoch": 0.014695863746958637, "grad_norm": 2.8041238457488578, "learning_rate": 4.886731391585761e-06, "loss": 0.3117, "step": 151 }, { "epoch": 0.014793187347931874, "grad_norm": 2.363481194731433, "learning_rate": 4.919093851132687e-06, "loss": 0.3325, "step": 152 }, { "epoch": 0.01489051094890511, "grad_norm": 3.078347599868747, "learning_rate": 4.951456310679612e-06, "loss": 0.3569, "step": 153 }, { "epoch": 0.014987834549878346, "grad_norm": 3.2926461094535515, "learning_rate": 4.983818770226538e-06, "loss": 0.716, "step": 154 }, { "epoch": 0.01508515815085158, "grad_norm": 2.340052421830345, "learning_rate": 5.016181229773464e-06, "loss": 0.2642, "step": 155 }, { "epoch": 0.015182481751824817, "grad_norm": 1.8915730140906823, "learning_rate": 5.048543689320389e-06, "loss": 0.3523, "step": 156 }, { "epoch": 0.015279805352798053, "grad_norm": 4.2448533254564484, "learning_rate": 5.080906148867314e-06, "loss": 0.5185, "step": 157 }, { "epoch": 0.01537712895377129, "grad_norm": 2.1172922256300333, "learning_rate": 5.11326860841424e-06, "loss": 0.3341, "step": 158 }, { "epoch": 0.015474452554744526, "grad_norm": 2.7414250631657113, "learning_rate": 5.145631067961165e-06, "loss": 0.5965, "step": 159 }, { "epoch": 0.015571776155717762, "grad_norm": 1.977804344185745, "learning_rate": 5.17799352750809e-06, "loss": 0.239, "step": 160 }, { "epoch": 0.015669099756690997, "grad_norm": 2.771807640315191, "learning_rate": 5.210355987055017e-06, "loss": 0.4122, "step": 161 }, { "epoch": 0.015766423357664233, "grad_norm": 1.9977073642008174, "learning_rate": 5.242718446601942e-06, "loss": 0.3423, "step": 162 }, { "epoch": 0.01586374695863747, "grad_norm": 3.222730527079622, "learning_rate": 5.275080906148867e-06, "loss": 0.5647, "step": 163 }, { "epoch": 0.015961070559610706, "grad_norm": 2.95441646694508, "learning_rate": 5.307443365695793e-06, "loss": 0.5198, "step": 164 }, { "epoch": 0.016058394160583942, "grad_norm": 2.3346384576429116, "learning_rate": 5.3398058252427185e-06, "loss": 0.3516, "step": 165 }, { "epoch": 0.01615571776155718, "grad_norm": 2.089159587923689, "learning_rate": 5.372168284789644e-06, "loss": 0.3704, "step": 166 }, { "epoch": 0.016253041362530415, "grad_norm": 2.8135820638465088, "learning_rate": 5.4045307443365705e-06, "loss": 0.3729, "step": 167 }, { "epoch": 0.01635036496350365, "grad_norm": 2.991259557993277, "learning_rate": 5.436893203883496e-06, "loss": 0.5622, "step": 168 }, { "epoch": 0.016447688564476887, "grad_norm": 3.1512644455187857, "learning_rate": 5.4692556634304216e-06, "loss": 0.5915, "step": 169 }, { "epoch": 0.01654501216545012, "grad_norm": 2.616126184062516, "learning_rate": 5.501618122977347e-06, "loss": 0.4252, "step": 170 }, { "epoch": 0.016642335766423356, "grad_norm": 1.9958281517625203, "learning_rate": 5.533980582524272e-06, "loss": 0.3704, "step": 171 }, { "epoch": 0.016739659367396593, "grad_norm": 2.470731302334384, "learning_rate": 5.566343042071199e-06, "loss": 0.4373, "step": 172 }, { "epoch": 0.01683698296836983, "grad_norm": 2.583270308023139, "learning_rate": 5.598705501618124e-06, "loss": 0.4125, "step": 173 }, { "epoch": 0.016934306569343065, "grad_norm": 1.9644684632241667, "learning_rate": 5.631067961165049e-06, "loss": 0.3522, "step": 174 }, { "epoch": 0.0170316301703163, "grad_norm": 2.4207097357376046, "learning_rate": 5.663430420711975e-06, "loss": 0.3579, "step": 175 }, { "epoch": 0.017128953771289538, "grad_norm": 2.3511041847292034, "learning_rate": 5.6957928802589e-06, "loss": 0.5412, "step": 176 }, { "epoch": 0.017226277372262774, "grad_norm": 2.274427899539275, "learning_rate": 5.728155339805825e-06, "loss": 0.5353, "step": 177 }, { "epoch": 0.01732360097323601, "grad_norm": 2.133749284526256, "learning_rate": 5.760517799352752e-06, "loss": 0.4392, "step": 178 }, { "epoch": 0.017420924574209247, "grad_norm": 2.3097462109285787, "learning_rate": 5.792880258899677e-06, "loss": 0.4442, "step": 179 }, { "epoch": 0.017518248175182483, "grad_norm": 2.2128802818602056, "learning_rate": 5.825242718446602e-06, "loss": 0.5635, "step": 180 }, { "epoch": 0.017615571776155716, "grad_norm": 2.103405792854256, "learning_rate": 5.857605177993528e-06, "loss": 0.4533, "step": 181 }, { "epoch": 0.017712895377128952, "grad_norm": 2.0565661990183597, "learning_rate": 5.889967637540453e-06, "loss": 0.3806, "step": 182 }, { "epoch": 0.01781021897810219, "grad_norm": 2.179649872267064, "learning_rate": 5.9223300970873785e-06, "loss": 0.3842, "step": 183 }, { "epoch": 0.017907542579075425, "grad_norm": 3.8333244047199146, "learning_rate": 5.954692556634305e-06, "loss": 0.3876, "step": 184 }, { "epoch": 0.01800486618004866, "grad_norm": 2.2893517217095716, "learning_rate": 5.9870550161812304e-06, "loss": 0.4781, "step": 185 }, { "epoch": 0.018102189781021898, "grad_norm": 1.6022498167897639, "learning_rate": 6.0194174757281556e-06, "loss": 0.2306, "step": 186 }, { "epoch": 0.018199513381995134, "grad_norm": 2.32863493589546, "learning_rate": 6.0517799352750815e-06, "loss": 0.5139, "step": 187 }, { "epoch": 0.01829683698296837, "grad_norm": 2.0789478938631314, "learning_rate": 6.084142394822007e-06, "loss": 0.2824, "step": 188 }, { "epoch": 0.018394160583941607, "grad_norm": 1.7544615955949223, "learning_rate": 6.116504854368932e-06, "loss": 0.4172, "step": 189 }, { "epoch": 0.018491484184914843, "grad_norm": 1.931043696572374, "learning_rate": 6.148867313915859e-06, "loss": 0.3584, "step": 190 }, { "epoch": 0.01858880778588808, "grad_norm": 2.467258437370788, "learning_rate": 6.181229773462784e-06, "loss": 0.462, "step": 191 }, { "epoch": 0.018686131386861315, "grad_norm": 2.1541091684996965, "learning_rate": 6.213592233009709e-06, "loss": 0.3967, "step": 192 }, { "epoch": 0.01878345498783455, "grad_norm": 2.2330486922808395, "learning_rate": 6.245954692556635e-06, "loss": 0.5316, "step": 193 }, { "epoch": 0.018880778588807785, "grad_norm": 2.3498262097642395, "learning_rate": 6.27831715210356e-06, "loss": 0.4815, "step": 194 }, { "epoch": 0.01897810218978102, "grad_norm": 1.7045092076002246, "learning_rate": 6.310679611650487e-06, "loss": 0.3, "step": 195 }, { "epoch": 0.019075425790754257, "grad_norm": 2.5703331850837023, "learning_rate": 6.343042071197412e-06, "loss": 0.4143, "step": 196 }, { "epoch": 0.019172749391727494, "grad_norm": 2.6940646171495133, "learning_rate": 6.375404530744337e-06, "loss": 0.5463, "step": 197 }, { "epoch": 0.01927007299270073, "grad_norm": 2.4185580273524847, "learning_rate": 6.407766990291263e-06, "loss": 0.5215, "step": 198 }, { "epoch": 0.019367396593673966, "grad_norm": 2.6509824694985946, "learning_rate": 6.440129449838188e-06, "loss": 0.5286, "step": 199 }, { "epoch": 0.019464720194647202, "grad_norm": 2.4807219128312767, "learning_rate": 6.472491909385113e-06, "loss": 0.3996, "step": 200 }, { "epoch": 0.01956204379562044, "grad_norm": 2.651883834043772, "learning_rate": 6.50485436893204e-06, "loss": 0.3499, "step": 201 }, { "epoch": 0.019659367396593675, "grad_norm": 2.670759179984812, "learning_rate": 6.537216828478965e-06, "loss": 0.552, "step": 202 }, { "epoch": 0.01975669099756691, "grad_norm": 2.51305850829245, "learning_rate": 6.56957928802589e-06, "loss": 0.3806, "step": 203 }, { "epoch": 0.019854014598540148, "grad_norm": 2.435954851305265, "learning_rate": 6.601941747572816e-06, "loss": 0.6093, "step": 204 }, { "epoch": 0.01995133819951338, "grad_norm": 2.091315833022872, "learning_rate": 6.6343042071197415e-06, "loss": 0.3573, "step": 205 }, { "epoch": 0.020048661800486617, "grad_norm": 2.205515437184344, "learning_rate": 6.666666666666667e-06, "loss": 0.2892, "step": 206 }, { "epoch": 0.020145985401459853, "grad_norm": 2.314981932930035, "learning_rate": 6.6990291262135935e-06, "loss": 0.4184, "step": 207 }, { "epoch": 0.02024330900243309, "grad_norm": 1.9102474885146974, "learning_rate": 6.731391585760519e-06, "loss": 0.2287, "step": 208 }, { "epoch": 0.020340632603406326, "grad_norm": 1.9408029275065433, "learning_rate": 6.763754045307444e-06, "loss": 0.3958, "step": 209 }, { "epoch": 0.020437956204379562, "grad_norm": 2.1006467731485823, "learning_rate": 6.79611650485437e-06, "loss": 0.3764, "step": 210 }, { "epoch": 0.0205352798053528, "grad_norm": 2.0927447282795146, "learning_rate": 6.828478964401295e-06, "loss": 0.531, "step": 211 }, { "epoch": 0.020632603406326035, "grad_norm": 3.4830081465453633, "learning_rate": 6.86084142394822e-06, "loss": 0.4887, "step": 212 }, { "epoch": 0.02072992700729927, "grad_norm": 2.253360993066953, "learning_rate": 6.893203883495147e-06, "loss": 0.4587, "step": 213 }, { "epoch": 0.020827250608272507, "grad_norm": 3.3751096354443852, "learning_rate": 6.925566343042072e-06, "loss": 0.3427, "step": 214 }, { "epoch": 0.020924574209245744, "grad_norm": 1.9729713112803993, "learning_rate": 6.957928802588997e-06, "loss": 0.384, "step": 215 }, { "epoch": 0.021021897810218976, "grad_norm": 2.761285167796522, "learning_rate": 6.990291262135923e-06, "loss": 0.3512, "step": 216 }, { "epoch": 0.021119221411192213, "grad_norm": 2.431882400442612, "learning_rate": 7.022653721682848e-06, "loss": 0.3971, "step": 217 }, { "epoch": 0.02121654501216545, "grad_norm": 3.659254877088116, "learning_rate": 7.055016181229773e-06, "loss": 0.4115, "step": 218 }, { "epoch": 0.021313868613138685, "grad_norm": 2.5501534359714655, "learning_rate": 7.0873786407767e-06, "loss": 0.4963, "step": 219 }, { "epoch": 0.02141119221411192, "grad_norm": 4.296894309260591, "learning_rate": 7.119741100323625e-06, "loss": 0.5203, "step": 220 }, { "epoch": 0.021508515815085158, "grad_norm": 2.5489854552137237, "learning_rate": 7.152103559870551e-06, "loss": 0.4343, "step": 221 }, { "epoch": 0.021605839416058394, "grad_norm": 2.00955207958064, "learning_rate": 7.184466019417476e-06, "loss": 0.3603, "step": 222 }, { "epoch": 0.02170316301703163, "grad_norm": 2.2675038932590224, "learning_rate": 7.2168284789644015e-06, "loss": 0.3968, "step": 223 }, { "epoch": 0.021800486618004867, "grad_norm": 2.4690586331753277, "learning_rate": 7.249190938511328e-06, "loss": 0.5883, "step": 224 }, { "epoch": 0.021897810218978103, "grad_norm": 2.141328682063472, "learning_rate": 7.2815533980582534e-06, "loss": 0.3547, "step": 225 }, { "epoch": 0.02199513381995134, "grad_norm": 2.223927434368622, "learning_rate": 7.3139158576051786e-06, "loss": 0.5031, "step": 226 }, { "epoch": 0.022092457420924576, "grad_norm": 2.8602320319532346, "learning_rate": 7.3462783171521046e-06, "loss": 0.4226, "step": 227 }, { "epoch": 0.02218978102189781, "grad_norm": 2.8852449405031835, "learning_rate": 7.37864077669903e-06, "loss": 0.4298, "step": 228 }, { "epoch": 0.022287104622871045, "grad_norm": 1.7370522944561966, "learning_rate": 7.411003236245955e-06, "loss": 0.3827, "step": 229 }, { "epoch": 0.02238442822384428, "grad_norm": 2.3907908463140584, "learning_rate": 7.443365695792882e-06, "loss": 0.4139, "step": 230 }, { "epoch": 0.022481751824817518, "grad_norm": 2.27581306432663, "learning_rate": 7.475728155339807e-06, "loss": 0.4736, "step": 231 }, { "epoch": 0.022579075425790754, "grad_norm": 2.1861094823645675, "learning_rate": 7.508090614886732e-06, "loss": 0.4809, "step": 232 }, { "epoch": 0.02267639902676399, "grad_norm": 1.9626208371421419, "learning_rate": 7.540453074433658e-06, "loss": 0.3436, "step": 233 }, { "epoch": 0.022773722627737226, "grad_norm": 1.7092390993202267, "learning_rate": 7.572815533980583e-06, "loss": 0.3224, "step": 234 }, { "epoch": 0.022871046228710463, "grad_norm": 3.0168693228526546, "learning_rate": 7.605177993527508e-06, "loss": 0.6366, "step": 235 }, { "epoch": 0.0229683698296837, "grad_norm": 2.424919921496664, "learning_rate": 7.637540453074434e-06, "loss": 0.4483, "step": 236 }, { "epoch": 0.023065693430656935, "grad_norm": 2.4586833984787626, "learning_rate": 7.66990291262136e-06, "loss": 0.4031, "step": 237 }, { "epoch": 0.02316301703163017, "grad_norm": 2.092010230715883, "learning_rate": 7.702265372168284e-06, "loss": 0.4257, "step": 238 }, { "epoch": 0.023260340632603408, "grad_norm": 2.3360188447701655, "learning_rate": 7.734627831715211e-06, "loss": 0.4684, "step": 239 }, { "epoch": 0.02335766423357664, "grad_norm": 2.087175894606599, "learning_rate": 7.766990291262136e-06, "loss": 0.4272, "step": 240 }, { "epoch": 0.023454987834549877, "grad_norm": 2.598684557686617, "learning_rate": 7.799352750809061e-06, "loss": 0.5401, "step": 241 }, { "epoch": 0.023552311435523113, "grad_norm": 2.025117037181364, "learning_rate": 7.831715210355988e-06, "loss": 0.372, "step": 242 }, { "epoch": 0.02364963503649635, "grad_norm": 2.2467324584398405, "learning_rate": 7.864077669902913e-06, "loss": 0.5891, "step": 243 }, { "epoch": 0.023746958637469586, "grad_norm": 2.38036373195977, "learning_rate": 7.896440129449839e-06, "loss": 0.5133, "step": 244 }, { "epoch": 0.023844282238442822, "grad_norm": 2.052700924442009, "learning_rate": 7.928802588996765e-06, "loss": 0.5161, "step": 245 }, { "epoch": 0.02394160583941606, "grad_norm": 3.4299018810240254, "learning_rate": 7.96116504854369e-06, "loss": 0.5314, "step": 246 }, { "epoch": 0.024038929440389295, "grad_norm": 1.3903956706369247, "learning_rate": 7.993527508090616e-06, "loss": 0.3539, "step": 247 }, { "epoch": 0.02413625304136253, "grad_norm": 2.4599878810180873, "learning_rate": 8.02588996763754e-06, "loss": 0.4876, "step": 248 }, { "epoch": 0.024233576642335768, "grad_norm": 2.4053308291912083, "learning_rate": 8.058252427184466e-06, "loss": 0.5185, "step": 249 }, { "epoch": 0.024330900243309004, "grad_norm": 1.6624263546342495, "learning_rate": 8.090614886731393e-06, "loss": 0.2909, "step": 250 }, { "epoch": 0.024428223844282237, "grad_norm": 2.4091367373679597, "learning_rate": 8.122977346278318e-06, "loss": 0.6192, "step": 251 }, { "epoch": 0.024525547445255473, "grad_norm": 2.4595313520548427, "learning_rate": 8.155339805825243e-06, "loss": 0.3444, "step": 252 }, { "epoch": 0.02462287104622871, "grad_norm": 2.3200411140153174, "learning_rate": 8.18770226537217e-06, "loss": 0.6112, "step": 253 }, { "epoch": 0.024720194647201946, "grad_norm": 2.029624875741936, "learning_rate": 8.220064724919095e-06, "loss": 0.4524, "step": 254 }, { "epoch": 0.024817518248175182, "grad_norm": 1.8862765408033388, "learning_rate": 8.25242718446602e-06, "loss": 0.2173, "step": 255 }, { "epoch": 0.02491484184914842, "grad_norm": 2.575687620331568, "learning_rate": 8.284789644012947e-06, "loss": 0.4599, "step": 256 }, { "epoch": 0.025012165450121655, "grad_norm": 2.373530485379713, "learning_rate": 8.317152103559872e-06, "loss": 0.5326, "step": 257 }, { "epoch": 0.02510948905109489, "grad_norm": 2.4086353319447262, "learning_rate": 8.349514563106797e-06, "loss": 0.6275, "step": 258 }, { "epoch": 0.025206812652068127, "grad_norm": 2.1075725625285697, "learning_rate": 8.381877022653722e-06, "loss": 0.44, "step": 259 }, { "epoch": 0.025304136253041364, "grad_norm": 2.0285700798989614, "learning_rate": 8.414239482200647e-06, "loss": 0.3489, "step": 260 }, { "epoch": 0.0254014598540146, "grad_norm": 2.5592973746241, "learning_rate": 8.446601941747573e-06, "loss": 0.4403, "step": 261 }, { "epoch": 0.025498783454987836, "grad_norm": 2.470930078509074, "learning_rate": 8.4789644012945e-06, "loss": 0.4985, "step": 262 }, { "epoch": 0.02559610705596107, "grad_norm": 2.099638103909556, "learning_rate": 8.511326860841424e-06, "loss": 0.4194, "step": 263 }, { "epoch": 0.025693430656934305, "grad_norm": 1.6030834140551835, "learning_rate": 8.54368932038835e-06, "loss": 0.3382, "step": 264 }, { "epoch": 0.02579075425790754, "grad_norm": 1.8960928547169034, "learning_rate": 8.576051779935276e-06, "loss": 0.2838, "step": 265 }, { "epoch": 0.025888077858880778, "grad_norm": 2.4306930963261966, "learning_rate": 8.608414239482202e-06, "loss": 0.4956, "step": 266 }, { "epoch": 0.025985401459854014, "grad_norm": 2.374430136325354, "learning_rate": 8.640776699029127e-06, "loss": 0.5083, "step": 267 }, { "epoch": 0.02608272506082725, "grad_norm": 2.410095115145934, "learning_rate": 8.673139158576054e-06, "loss": 0.4247, "step": 268 }, { "epoch": 0.026180048661800487, "grad_norm": 2.41271065696519, "learning_rate": 8.705501618122979e-06, "loss": 0.6946, "step": 269 }, { "epoch": 0.026277372262773723, "grad_norm": 1.752688930628829, "learning_rate": 8.737864077669904e-06, "loss": 0.2662, "step": 270 }, { "epoch": 0.02637469586374696, "grad_norm": 1.9842034213162434, "learning_rate": 8.770226537216829e-06, "loss": 0.3611, "step": 271 }, { "epoch": 0.026472019464720196, "grad_norm": 2.4137979998327497, "learning_rate": 8.802588996763754e-06, "loss": 0.501, "step": 272 }, { "epoch": 0.026569343065693432, "grad_norm": 2.929650064864996, "learning_rate": 8.834951456310681e-06, "loss": 0.6153, "step": 273 }, { "epoch": 0.02666666666666667, "grad_norm": 2.281738020025263, "learning_rate": 8.867313915857606e-06, "loss": 0.5395, "step": 274 }, { "epoch": 0.0267639902676399, "grad_norm": 2.1406726692627975, "learning_rate": 8.899676375404531e-06, "loss": 0.4039, "step": 275 }, { "epoch": 0.026861313868613138, "grad_norm": 3.2366954201371523, "learning_rate": 8.932038834951458e-06, "loss": 0.5414, "step": 276 }, { "epoch": 0.026958637469586374, "grad_norm": 2.1900667662872513, "learning_rate": 8.964401294498383e-06, "loss": 0.3815, "step": 277 }, { "epoch": 0.02705596107055961, "grad_norm": 2.5301939091612216, "learning_rate": 8.996763754045308e-06, "loss": 0.8016, "step": 278 }, { "epoch": 0.027153284671532846, "grad_norm": 2.2552758985680907, "learning_rate": 9.029126213592233e-06, "loss": 0.4133, "step": 279 }, { "epoch": 0.027250608272506083, "grad_norm": 2.309545536997134, "learning_rate": 9.06148867313916e-06, "loss": 0.5346, "step": 280 }, { "epoch": 0.02734793187347932, "grad_norm": 2.585578916644781, "learning_rate": 9.093851132686085e-06, "loss": 0.407, "step": 281 }, { "epoch": 0.027445255474452555, "grad_norm": 1.8503464194025006, "learning_rate": 9.12621359223301e-06, "loss": 0.4674, "step": 282 }, { "epoch": 0.02754257907542579, "grad_norm": 2.431490115980846, "learning_rate": 9.158576051779936e-06, "loss": 0.6026, "step": 283 }, { "epoch": 0.027639902676399028, "grad_norm": 1.916233248702735, "learning_rate": 9.19093851132686e-06, "loss": 0.4949, "step": 284 }, { "epoch": 0.027737226277372264, "grad_norm": 2.2160236640245072, "learning_rate": 9.223300970873788e-06, "loss": 0.4765, "step": 285 }, { "epoch": 0.027834549878345497, "grad_norm": 2.0764827118780143, "learning_rate": 9.255663430420713e-06, "loss": 0.472, "step": 286 }, { "epoch": 0.027931873479318733, "grad_norm": 2.638286661284288, "learning_rate": 9.288025889967638e-06, "loss": 0.6312, "step": 287 }, { "epoch": 0.02802919708029197, "grad_norm": 1.940011273577467, "learning_rate": 9.320388349514565e-06, "loss": 0.4555, "step": 288 }, { "epoch": 0.028126520681265206, "grad_norm": 1.8760624736314784, "learning_rate": 9.35275080906149e-06, "loss": 0.3625, "step": 289 }, { "epoch": 0.028223844282238442, "grad_norm": 1.3468692859077058, "learning_rate": 9.385113268608415e-06, "loss": 0.2442, "step": 290 }, { "epoch": 0.02832116788321168, "grad_norm": 2.1497394847504014, "learning_rate": 9.41747572815534e-06, "loss": 0.5227, "step": 291 }, { "epoch": 0.028418491484184915, "grad_norm": 2.1233743171190014, "learning_rate": 9.449838187702267e-06, "loss": 0.6184, "step": 292 }, { "epoch": 0.02851581508515815, "grad_norm": 2.337806183860394, "learning_rate": 9.482200647249192e-06, "loss": 0.5491, "step": 293 }, { "epoch": 0.028613138686131388, "grad_norm": 2.015000594070385, "learning_rate": 9.514563106796117e-06, "loss": 0.5137, "step": 294 }, { "epoch": 0.028710462287104624, "grad_norm": 2.0267324830753766, "learning_rate": 9.546925566343042e-06, "loss": 0.4117, "step": 295 }, { "epoch": 0.02880778588807786, "grad_norm": 1.732639028192012, "learning_rate": 9.579288025889967e-06, "loss": 0.3156, "step": 296 }, { "epoch": 0.028905109489051097, "grad_norm": 2.1204056159243923, "learning_rate": 9.611650485436894e-06, "loss": 0.6056, "step": 297 }, { "epoch": 0.02900243309002433, "grad_norm": 1.7868071753968195, "learning_rate": 9.64401294498382e-06, "loss": 0.3417, "step": 298 }, { "epoch": 0.029099756690997566, "grad_norm": 1.9477439300595292, "learning_rate": 9.676375404530746e-06, "loss": 0.3631, "step": 299 }, { "epoch": 0.029197080291970802, "grad_norm": 1.7688147839655162, "learning_rate": 9.708737864077671e-06, "loss": 0.3605, "step": 300 }, { "epoch": 0.029294403892944038, "grad_norm": 1.9162335597538034, "learning_rate": 9.741100323624596e-06, "loss": 0.2498, "step": 301 }, { "epoch": 0.029391727493917275, "grad_norm": 2.9282579520055756, "learning_rate": 9.773462783171522e-06, "loss": 0.4286, "step": 302 }, { "epoch": 0.02948905109489051, "grad_norm": 1.9744499285549086, "learning_rate": 9.805825242718447e-06, "loss": 0.3391, "step": 303 }, { "epoch": 0.029586374695863747, "grad_norm": 2.2116032868392455, "learning_rate": 9.838187702265373e-06, "loss": 0.3414, "step": 304 }, { "epoch": 0.029683698296836983, "grad_norm": 1.9159144570242486, "learning_rate": 9.870550161812299e-06, "loss": 0.2915, "step": 305 }, { "epoch": 0.02978102189781022, "grad_norm": 2.671718838238437, "learning_rate": 9.902912621359224e-06, "loss": 0.79, "step": 306 }, { "epoch": 0.029878345498783456, "grad_norm": 2.093937424199301, "learning_rate": 9.935275080906149e-06, "loss": 0.576, "step": 307 }, { "epoch": 0.029975669099756692, "grad_norm": 1.895574286512308, "learning_rate": 9.967637540453076e-06, "loss": 0.4223, "step": 308 }, { "epoch": 0.03007299270072993, "grad_norm": 2.142643554578675, "learning_rate": 1e-05, "loss": 0.4719, "step": 309 }, { "epoch": 0.03017031630170316, "grad_norm": 2.2548613483238378, "learning_rate": 9.999999751573464e-06, "loss": 0.5547, "step": 310 }, { "epoch": 0.030267639902676398, "grad_norm": 2.375146158639999, "learning_rate": 9.99999900629388e-06, "loss": 0.3864, "step": 311 }, { "epoch": 0.030364963503649634, "grad_norm": 1.558937895217452, "learning_rate": 9.99999776416132e-06, "loss": 0.3409, "step": 312 }, { "epoch": 0.03046228710462287, "grad_norm": 2.7508940543848115, "learning_rate": 9.99999602517591e-06, "loss": 0.3641, "step": 313 }, { "epoch": 0.030559610705596107, "grad_norm": 2.228096737889712, "learning_rate": 9.99999378933782e-06, "loss": 0.6464, "step": 314 }, { "epoch": 0.030656934306569343, "grad_norm": 1.5612763763472, "learning_rate": 9.999991056647274e-06, "loss": 0.3124, "step": 315 }, { "epoch": 0.03075425790754258, "grad_norm": 2.3203527787434104, "learning_rate": 9.999987827104544e-06, "loss": 0.5893, "step": 316 }, { "epoch": 0.030851581508515816, "grad_norm": 1.8472611567410342, "learning_rate": 9.999984100709951e-06, "loss": 0.3732, "step": 317 }, { "epoch": 0.030948905109489052, "grad_norm": 2.269778108549014, "learning_rate": 9.999979877463866e-06, "loss": 0.5537, "step": 318 }, { "epoch": 0.03104622871046229, "grad_norm": 2.381498581134022, "learning_rate": 9.999975157366705e-06, "loss": 0.7179, "step": 319 }, { "epoch": 0.031143552311435525, "grad_norm": 1.7030655036823346, "learning_rate": 9.99996994041894e-06, "loss": 0.4256, "step": 320 }, { "epoch": 0.031240875912408757, "grad_norm": 1.8361141038730153, "learning_rate": 9.999964226621089e-06, "loss": 0.4648, "step": 321 }, { "epoch": 0.031338199513381994, "grad_norm": 1.7985459229558753, "learning_rate": 9.99995801597372e-06, "loss": 0.3031, "step": 322 }, { "epoch": 0.031435523114355234, "grad_norm": 2.4309020119442915, "learning_rate": 9.99995130847745e-06, "loss": 0.5011, "step": 323 }, { "epoch": 0.031532846715328466, "grad_norm": 2.048514022969095, "learning_rate": 9.999944104132944e-06, "loss": 0.6152, "step": 324 }, { "epoch": 0.031630170316301706, "grad_norm": 1.8892667320795724, "learning_rate": 9.99993640294092e-06, "loss": 0.4738, "step": 325 }, { "epoch": 0.03172749391727494, "grad_norm": 2.081179331819785, "learning_rate": 9.999928204902141e-06, "loss": 0.5192, "step": 326 }, { "epoch": 0.03182481751824817, "grad_norm": 2.410280889073595, "learning_rate": 9.999919510017424e-06, "loss": 0.3314, "step": 327 }, { "epoch": 0.03192214111922141, "grad_norm": 1.663034255724975, "learning_rate": 9.999910318287632e-06, "loss": 0.3342, "step": 328 }, { "epoch": 0.032019464720194644, "grad_norm": 1.7874391345352068, "learning_rate": 9.999900629713679e-06, "loss": 0.3189, "step": 329 }, { "epoch": 0.032116788321167884, "grad_norm": 2.1098429973097805, "learning_rate": 9.999890444296528e-06, "loss": 0.4561, "step": 330 }, { "epoch": 0.03221411192214112, "grad_norm": 2.4678279265353558, "learning_rate": 9.999879762037187e-06, "loss": 0.5831, "step": 331 }, { "epoch": 0.03231143552311436, "grad_norm": 1.6643716587630457, "learning_rate": 9.999868582936726e-06, "loss": 0.4371, "step": 332 }, { "epoch": 0.03240875912408759, "grad_norm": 2.088466639768523, "learning_rate": 9.999856906996246e-06, "loss": 0.3904, "step": 333 }, { "epoch": 0.03250608272506083, "grad_norm": 2.0023651443392256, "learning_rate": 9.999844734216914e-06, "loss": 0.4802, "step": 334 }, { "epoch": 0.03260340632603406, "grad_norm": 2.161282844007076, "learning_rate": 9.99983206459994e-06, "loss": 0.5187, "step": 335 }, { "epoch": 0.0327007299270073, "grad_norm": 2.10212671583593, "learning_rate": 9.999818898146576e-06, "loss": 0.4618, "step": 336 }, { "epoch": 0.032798053527980535, "grad_norm": 2.2142899508809286, "learning_rate": 9.999805234858137e-06, "loss": 0.2387, "step": 337 }, { "epoch": 0.032895377128953775, "grad_norm": 2.1084763484693023, "learning_rate": 9.999791074735981e-06, "loss": 0.5652, "step": 338 }, { "epoch": 0.03299270072992701, "grad_norm": 2.261838498017328, "learning_rate": 9.99977641778151e-06, "loss": 0.7224, "step": 339 }, { "epoch": 0.03309002433090024, "grad_norm": 1.612816030006559, "learning_rate": 9.999761263996184e-06, "loss": 0.377, "step": 340 }, { "epoch": 0.03318734793187348, "grad_norm": 2.1209830295615832, "learning_rate": 9.999745613381507e-06, "loss": 0.614, "step": 341 }, { "epoch": 0.03328467153284671, "grad_norm": 1.7938764015879674, "learning_rate": 9.999729465939036e-06, "loss": 0.3983, "step": 342 }, { "epoch": 0.03338199513381995, "grad_norm": 1.943418875698731, "learning_rate": 9.999712821670375e-06, "loss": 0.4708, "step": 343 }, { "epoch": 0.033479318734793186, "grad_norm": 1.9787546900237571, "learning_rate": 9.99969568057718e-06, "loss": 0.578, "step": 344 }, { "epoch": 0.033576642335766425, "grad_norm": 1.4798263726328331, "learning_rate": 9.99967804266115e-06, "loss": 0.394, "step": 345 }, { "epoch": 0.03367396593673966, "grad_norm": 2.1936298043995484, "learning_rate": 9.99965990792404e-06, "loss": 0.6316, "step": 346 }, { "epoch": 0.0337712895377129, "grad_norm": 2.2799650780195133, "learning_rate": 9.99964127636765e-06, "loss": 0.3985, "step": 347 }, { "epoch": 0.03386861313868613, "grad_norm": 1.8519049219191819, "learning_rate": 9.999622147993837e-06, "loss": 0.3853, "step": 348 }, { "epoch": 0.03396593673965937, "grad_norm": 1.5111895282974241, "learning_rate": 9.999602522804497e-06, "loss": 0.4201, "step": 349 }, { "epoch": 0.0340632603406326, "grad_norm": 1.8605769784283237, "learning_rate": 9.99958240080158e-06, "loss": 0.5225, "step": 350 }, { "epoch": 0.034160583941605836, "grad_norm": 1.6063240538866903, "learning_rate": 9.999561781987087e-06, "loss": 0.3165, "step": 351 }, { "epoch": 0.034257907542579076, "grad_norm": 1.4751976204077173, "learning_rate": 9.999540666363068e-06, "loss": 0.3156, "step": 352 }, { "epoch": 0.03435523114355231, "grad_norm": 2.1029966771511757, "learning_rate": 9.99951905393162e-06, "loss": 0.5336, "step": 353 }, { "epoch": 0.03445255474452555, "grad_norm": 2.1419054642874267, "learning_rate": 9.99949694469489e-06, "loss": 0.5253, "step": 354 }, { "epoch": 0.03454987834549878, "grad_norm": 2.169397271826959, "learning_rate": 9.999474338655075e-06, "loss": 0.5567, "step": 355 }, { "epoch": 0.03464720194647202, "grad_norm": 2.2972412855327797, "learning_rate": 9.999451235814422e-06, "loss": 0.5233, "step": 356 }, { "epoch": 0.034744525547445254, "grad_norm": 1.830377999961128, "learning_rate": 9.999427636175228e-06, "loss": 0.4297, "step": 357 }, { "epoch": 0.034841849148418494, "grad_norm": 2.1217123292302875, "learning_rate": 9.999403539739837e-06, "loss": 0.3605, "step": 358 }, { "epoch": 0.03493917274939173, "grad_norm": 2.001599625802253, "learning_rate": 9.999378946510642e-06, "loss": 0.5237, "step": 359 }, { "epoch": 0.035036496350364967, "grad_norm": 1.6719956399048532, "learning_rate": 9.99935385649009e-06, "loss": 0.424, "step": 360 }, { "epoch": 0.0351338199513382, "grad_norm": 1.5962062682133515, "learning_rate": 9.99932826968067e-06, "loss": 0.4228, "step": 361 }, { "epoch": 0.03523114355231143, "grad_norm": 1.9754274750693919, "learning_rate": 9.999302186084929e-06, "loss": 0.4333, "step": 362 }, { "epoch": 0.03532846715328467, "grad_norm": 1.8248617929879183, "learning_rate": 9.999275605705457e-06, "loss": 0.4985, "step": 363 }, { "epoch": 0.035425790754257905, "grad_norm": 2.5923075514224982, "learning_rate": 9.999248528544895e-06, "loss": 0.4829, "step": 364 }, { "epoch": 0.035523114355231145, "grad_norm": 1.9900801938638135, "learning_rate": 9.999220954605932e-06, "loss": 0.587, "step": 365 }, { "epoch": 0.03562043795620438, "grad_norm": 1.731558772897005, "learning_rate": 9.999192883891314e-06, "loss": 0.3299, "step": 366 }, { "epoch": 0.03571776155717762, "grad_norm": 2.339577788711278, "learning_rate": 9.999164316403823e-06, "loss": 0.4845, "step": 367 }, { "epoch": 0.03581508515815085, "grad_norm": 1.9784113864985955, "learning_rate": 9.999135252146302e-06, "loss": 0.5776, "step": 368 }, { "epoch": 0.03591240875912409, "grad_norm": 1.5555461256937277, "learning_rate": 9.999105691121638e-06, "loss": 0.3563, "step": 369 }, { "epoch": 0.03600973236009732, "grad_norm": 1.7905677559908044, "learning_rate": 9.99907563333277e-06, "loss": 0.546, "step": 370 }, { "epoch": 0.03610705596107056, "grad_norm": 2.0490894714600287, "learning_rate": 9.999045078782684e-06, "loss": 0.6836, "step": 371 }, { "epoch": 0.036204379562043795, "grad_norm": 2.216601446334751, "learning_rate": 9.999014027474413e-06, "loss": 0.5237, "step": 372 }, { "epoch": 0.036301703163017035, "grad_norm": 1.5937926342815392, "learning_rate": 9.998982479411047e-06, "loss": 0.3539, "step": 373 }, { "epoch": 0.03639902676399027, "grad_norm": 2.3941848280266864, "learning_rate": 9.99895043459572e-06, "loss": 0.6249, "step": 374 }, { "epoch": 0.0364963503649635, "grad_norm": 2.072859669066288, "learning_rate": 9.998917893031615e-06, "loss": 0.5415, "step": 375 }, { "epoch": 0.03659367396593674, "grad_norm": 1.670908711065728, "learning_rate": 9.998884854721968e-06, "loss": 0.3034, "step": 376 }, { "epoch": 0.03669099756690997, "grad_norm": 1.9880303784818283, "learning_rate": 9.998851319670057e-06, "loss": 0.5025, "step": 377 }, { "epoch": 0.03678832116788321, "grad_norm": 1.3517666701087396, "learning_rate": 9.99881728787922e-06, "loss": 0.2775, "step": 378 }, { "epoch": 0.036885644768856446, "grad_norm": 1.8952553535268069, "learning_rate": 9.998782759352839e-06, "loss": 0.5306, "step": 379 }, { "epoch": 0.036982968369829686, "grad_norm": 1.8730537486024816, "learning_rate": 9.998747734094338e-06, "loss": 0.386, "step": 380 }, { "epoch": 0.03708029197080292, "grad_norm": 2.058996056292158, "learning_rate": 9.998712212107205e-06, "loss": 0.5641, "step": 381 }, { "epoch": 0.03717761557177616, "grad_norm": 1.9837834234853275, "learning_rate": 9.998676193394966e-06, "loss": 0.2628, "step": 382 }, { "epoch": 0.03727493917274939, "grad_norm": 2.189700953999047, "learning_rate": 9.998639677961203e-06, "loss": 0.6024, "step": 383 }, { "epoch": 0.03737226277372263, "grad_norm": 2.060696593716547, "learning_rate": 9.99860266580954e-06, "loss": 0.5377, "step": 384 }, { "epoch": 0.037469586374695864, "grad_norm": 2.0831966609629227, "learning_rate": 9.99856515694366e-06, "loss": 0.5063, "step": 385 }, { "epoch": 0.0375669099756691, "grad_norm": 2.2950496556846227, "learning_rate": 9.998527151367288e-06, "loss": 0.6484, "step": 386 }, { "epoch": 0.037664233576642336, "grad_norm": 2.2597922123273873, "learning_rate": 9.9984886490842e-06, "loss": 0.6617, "step": 387 }, { "epoch": 0.03776155717761557, "grad_norm": 2.071575887731456, "learning_rate": 9.99844965009822e-06, "loss": 0.5405, "step": 388 }, { "epoch": 0.03785888077858881, "grad_norm": 2.004249587957457, "learning_rate": 9.99841015441323e-06, "loss": 0.4306, "step": 389 }, { "epoch": 0.03795620437956204, "grad_norm": 1.9297023880727862, "learning_rate": 9.99837016203315e-06, "loss": 0.4083, "step": 390 }, { "epoch": 0.03805352798053528, "grad_norm": 2.001337081282171, "learning_rate": 9.998329672961952e-06, "loss": 0.4999, "step": 391 }, { "epoch": 0.038150851581508514, "grad_norm": 1.7630230797021285, "learning_rate": 9.998288687203665e-06, "loss": 0.4267, "step": 392 }, { "epoch": 0.038248175182481754, "grad_norm": 1.4413546421147376, "learning_rate": 9.998247204762358e-06, "loss": 0.3028, "step": 393 }, { "epoch": 0.03834549878345499, "grad_norm": 2.032450629241147, "learning_rate": 9.998205225642154e-06, "loss": 0.4216, "step": 394 }, { "epoch": 0.03844282238442823, "grad_norm": 1.8288270303352272, "learning_rate": 9.998162749847224e-06, "loss": 0.451, "step": 395 }, { "epoch": 0.03854014598540146, "grad_norm": 1.5869427581540143, "learning_rate": 9.998119777381791e-06, "loss": 0.4896, "step": 396 }, { "epoch": 0.03863746958637469, "grad_norm": 1.9312614168983935, "learning_rate": 9.998076308250122e-06, "loss": 0.351, "step": 397 }, { "epoch": 0.03873479318734793, "grad_norm": 2.182734939846557, "learning_rate": 9.99803234245654e-06, "loss": 0.4456, "step": 398 }, { "epoch": 0.038832116788321165, "grad_norm": 1.6075130172605856, "learning_rate": 9.997987880005412e-06, "loss": 0.3333, "step": 399 }, { "epoch": 0.038929440389294405, "grad_norm": 2.0206579020801048, "learning_rate": 9.997942920901154e-06, "loss": 0.4662, "step": 400 }, { "epoch": 0.03902676399026764, "grad_norm": 2.0019154912621246, "learning_rate": 9.997897465148236e-06, "loss": 0.588, "step": 401 }, { "epoch": 0.03912408759124088, "grad_norm": 1.9556688755730123, "learning_rate": 9.997851512751178e-06, "loss": 0.5364, "step": 402 }, { "epoch": 0.03922141119221411, "grad_norm": 2.1735940620422687, "learning_rate": 9.997805063714541e-06, "loss": 0.4155, "step": 403 }, { "epoch": 0.03931873479318735, "grad_norm": 1.893104755523836, "learning_rate": 9.997758118042945e-06, "loss": 0.2835, "step": 404 }, { "epoch": 0.03941605839416058, "grad_norm": 1.892857392200546, "learning_rate": 9.99771067574105e-06, "loss": 0.317, "step": 405 }, { "epoch": 0.03951338199513382, "grad_norm": 2.194365925195629, "learning_rate": 9.997662736813575e-06, "loss": 0.5972, "step": 406 }, { "epoch": 0.039610705596107056, "grad_norm": 2.3359516870584547, "learning_rate": 9.997614301265281e-06, "loss": 0.3505, "step": 407 }, { "epoch": 0.039708029197080295, "grad_norm": 1.8041349283411827, "learning_rate": 9.997565369100983e-06, "loss": 0.4003, "step": 408 }, { "epoch": 0.03980535279805353, "grad_norm": 2.2199870140108273, "learning_rate": 9.997515940325542e-06, "loss": 0.4428, "step": 409 }, { "epoch": 0.03990267639902676, "grad_norm": 2.193796849633566, "learning_rate": 9.997466014943871e-06, "loss": 0.3906, "step": 410 }, { "epoch": 0.04, "grad_norm": 2.7309920828616168, "learning_rate": 9.99741559296093e-06, "loss": 0.6283, "step": 411 }, { "epoch": 0.040097323600973234, "grad_norm": 2.220745639846989, "learning_rate": 9.99736467438173e-06, "loss": 0.4568, "step": 412 }, { "epoch": 0.04019464720194647, "grad_norm": 1.905067765139487, "learning_rate": 9.99731325921133e-06, "loss": 0.3198, "step": 413 }, { "epoch": 0.040291970802919706, "grad_norm": 2.0461180940034116, "learning_rate": 9.997261347454841e-06, "loss": 0.3783, "step": 414 }, { "epoch": 0.040389294403892946, "grad_norm": 1.9732614929529544, "learning_rate": 9.99720893911742e-06, "loss": 0.5211, "step": 415 }, { "epoch": 0.04048661800486618, "grad_norm": 2.341156401873798, "learning_rate": 9.997156034204276e-06, "loss": 0.5094, "step": 416 }, { "epoch": 0.04058394160583942, "grad_norm": 2.2588135503158138, "learning_rate": 9.997102632720664e-06, "loss": 0.591, "step": 417 }, { "epoch": 0.04068126520681265, "grad_norm": 2.187795564574772, "learning_rate": 9.997048734671893e-06, "loss": 0.3811, "step": 418 }, { "epoch": 0.04077858880778589, "grad_norm": 2.2570398189900938, "learning_rate": 9.996994340063314e-06, "loss": 0.4494, "step": 419 }, { "epoch": 0.040875912408759124, "grad_norm": 2.3267878846596597, "learning_rate": 9.996939448900341e-06, "loss": 0.5254, "step": 420 }, { "epoch": 0.04097323600973236, "grad_norm": 1.9149387144635641, "learning_rate": 9.99688406118842e-06, "loss": 0.4281, "step": 421 }, { "epoch": 0.0410705596107056, "grad_norm": 2.4052095021382285, "learning_rate": 9.996828176933062e-06, "loss": 0.61, "step": 422 }, { "epoch": 0.04116788321167883, "grad_norm": 2.8744864627123237, "learning_rate": 9.996771796139814e-06, "loss": 0.4708, "step": 423 }, { "epoch": 0.04126520681265207, "grad_norm": 2.0334953222734513, "learning_rate": 9.996714918814284e-06, "loss": 0.2697, "step": 424 }, { "epoch": 0.0413625304136253, "grad_norm": 2.1314093477075486, "learning_rate": 9.996657544962119e-06, "loss": 0.3026, "step": 425 }, { "epoch": 0.04145985401459854, "grad_norm": 1.7241742631767316, "learning_rate": 9.996599674589022e-06, "loss": 0.3624, "step": 426 }, { "epoch": 0.041557177615571775, "grad_norm": 2.417754377928955, "learning_rate": 9.996541307700746e-06, "loss": 0.6682, "step": 427 }, { "epoch": 0.041654501216545015, "grad_norm": 2.2126055245100256, "learning_rate": 9.99648244430309e-06, "loss": 0.3705, "step": 428 }, { "epoch": 0.04175182481751825, "grad_norm": 1.8224510106748588, "learning_rate": 9.996423084401901e-06, "loss": 0.4318, "step": 429 }, { "epoch": 0.04184914841849149, "grad_norm": 1.6786428352287364, "learning_rate": 9.996363228003079e-06, "loss": 0.4662, "step": 430 }, { "epoch": 0.04194647201946472, "grad_norm": 1.9342922605897592, "learning_rate": 9.99630287511257e-06, "loss": 0.4874, "step": 431 }, { "epoch": 0.04204379562043795, "grad_norm": 1.9444011100602645, "learning_rate": 9.996242025736377e-06, "loss": 0.3711, "step": 432 }, { "epoch": 0.04214111922141119, "grad_norm": 3.114184163688958, "learning_rate": 9.99618067988054e-06, "loss": 0.5342, "step": 433 }, { "epoch": 0.042238442822384425, "grad_norm": 1.993932460938173, "learning_rate": 9.99611883755116e-06, "loss": 0.465, "step": 434 }, { "epoch": 0.042335766423357665, "grad_norm": 1.5062408953506277, "learning_rate": 9.99605649875438e-06, "loss": 0.3862, "step": 435 }, { "epoch": 0.0424330900243309, "grad_norm": 2.5287447175721733, "learning_rate": 9.995993663496394e-06, "loss": 0.5638, "step": 436 }, { "epoch": 0.04253041362530414, "grad_norm": 1.7215400937807486, "learning_rate": 9.995930331783448e-06, "loss": 0.3507, "step": 437 }, { "epoch": 0.04262773722627737, "grad_norm": 1.5105936757865817, "learning_rate": 9.995866503621834e-06, "loss": 0.4086, "step": 438 }, { "epoch": 0.04272506082725061, "grad_norm": 1.828501540310894, "learning_rate": 9.995802179017893e-06, "loss": 0.3477, "step": 439 }, { "epoch": 0.04282238442822384, "grad_norm": 1.6658361590948114, "learning_rate": 9.995737357978022e-06, "loss": 0.4006, "step": 440 }, { "epoch": 0.04291970802919708, "grad_norm": 1.6434395036324305, "learning_rate": 9.995672040508656e-06, "loss": 0.4349, "step": 441 }, { "epoch": 0.043017031630170316, "grad_norm": 1.9913424027071125, "learning_rate": 9.99560622661629e-06, "loss": 0.3415, "step": 442 }, { "epoch": 0.043114355231143556, "grad_norm": 1.6487474195389296, "learning_rate": 9.995539916307463e-06, "loss": 0.4804, "step": 443 }, { "epoch": 0.04321167883211679, "grad_norm": 1.4861266391850032, "learning_rate": 9.995473109588764e-06, "loss": 0.411, "step": 444 }, { "epoch": 0.04330900243309002, "grad_norm": 1.4390762643228305, "learning_rate": 9.995405806466831e-06, "loss": 0.3806, "step": 445 }, { "epoch": 0.04340632603406326, "grad_norm": 1.7775332171720517, "learning_rate": 9.995338006948353e-06, "loss": 0.3332, "step": 446 }, { "epoch": 0.043503649635036494, "grad_norm": 1.7312883283317864, "learning_rate": 9.995269711040067e-06, "loss": 0.2736, "step": 447 }, { "epoch": 0.043600973236009734, "grad_norm": 1.7973901424872405, "learning_rate": 9.995200918748759e-06, "loss": 0.5597, "step": 448 }, { "epoch": 0.04369829683698297, "grad_norm": 2.0409413301370334, "learning_rate": 9.995131630081265e-06, "loss": 0.6045, "step": 449 }, { "epoch": 0.043795620437956206, "grad_norm": 3.2708903670147347, "learning_rate": 9.995061845044473e-06, "loss": 0.6245, "step": 450 }, { "epoch": 0.04389294403892944, "grad_norm": 1.744466889932859, "learning_rate": 9.994991563645314e-06, "loss": 0.4129, "step": 451 }, { "epoch": 0.04399026763990268, "grad_norm": 1.8775864246251477, "learning_rate": 9.994920785890771e-06, "loss": 0.414, "step": 452 }, { "epoch": 0.04408759124087591, "grad_norm": 1.3868286948878126, "learning_rate": 9.994849511787881e-06, "loss": 0.3164, "step": 453 }, { "epoch": 0.04418491484184915, "grad_norm": 1.6888257223301795, "learning_rate": 9.994777741343727e-06, "loss": 0.3241, "step": 454 }, { "epoch": 0.044282238442822384, "grad_norm": 1.5029594314338663, "learning_rate": 9.994705474565436e-06, "loss": 0.4148, "step": 455 }, { "epoch": 0.04437956204379562, "grad_norm": 1.7159996915963702, "learning_rate": 9.994632711460193e-06, "loss": 0.3387, "step": 456 }, { "epoch": 0.04447688564476886, "grad_norm": 1.7717997513120352, "learning_rate": 9.994559452035228e-06, "loss": 0.4547, "step": 457 }, { "epoch": 0.04457420924574209, "grad_norm": 1.887765282184233, "learning_rate": 9.99448569629782e-06, "loss": 0.5919, "step": 458 }, { "epoch": 0.04467153284671533, "grad_norm": 2.0151049512314585, "learning_rate": 9.994411444255298e-06, "loss": 0.4556, "step": 459 }, { "epoch": 0.04476885644768856, "grad_norm": 1.5706463359289826, "learning_rate": 9.994336695915041e-06, "loss": 0.3443, "step": 460 }, { "epoch": 0.0448661800486618, "grad_norm": 1.9067884841542395, "learning_rate": 9.994261451284477e-06, "loss": 0.5862, "step": 461 }, { "epoch": 0.044963503649635035, "grad_norm": 1.7346846845298518, "learning_rate": 9.994185710371083e-06, "loss": 0.3588, "step": 462 }, { "epoch": 0.045060827250608275, "grad_norm": 1.5593715629463312, "learning_rate": 9.994109473182385e-06, "loss": 0.2891, "step": 463 }, { "epoch": 0.04515815085158151, "grad_norm": 2.326736753149576, "learning_rate": 9.994032739725959e-06, "loss": 0.6517, "step": 464 }, { "epoch": 0.04525547445255475, "grad_norm": 2.2142852132770305, "learning_rate": 9.99395551000943e-06, "loss": 0.3571, "step": 465 }, { "epoch": 0.04535279805352798, "grad_norm": 1.7351954813390544, "learning_rate": 9.993877784040474e-06, "loss": 0.3849, "step": 466 }, { "epoch": 0.04545012165450121, "grad_norm": 1.3962336815381617, "learning_rate": 9.993799561826811e-06, "loss": 0.311, "step": 467 }, { "epoch": 0.04554744525547445, "grad_norm": 1.878958465421645, "learning_rate": 9.993720843376216e-06, "loss": 0.5602, "step": 468 }, { "epoch": 0.045644768856447686, "grad_norm": 1.519160992933857, "learning_rate": 9.993641628696513e-06, "loss": 0.2379, "step": 469 }, { "epoch": 0.045742092457420926, "grad_norm": 2.5345930464298885, "learning_rate": 9.99356191779557e-06, "loss": 0.4239, "step": 470 }, { "epoch": 0.04583941605839416, "grad_norm": 1.3153911718041251, "learning_rate": 9.993481710681314e-06, "loss": 0.3454, "step": 471 }, { "epoch": 0.0459367396593674, "grad_norm": 2.16208125563947, "learning_rate": 9.993401007361707e-06, "loss": 0.5386, "step": 472 }, { "epoch": 0.04603406326034063, "grad_norm": 1.8150842593472827, "learning_rate": 9.993319807844775e-06, "loss": 0.3077, "step": 473 }, { "epoch": 0.04613138686131387, "grad_norm": 1.6656864462678063, "learning_rate": 9.993238112138584e-06, "loss": 0.4927, "step": 474 }, { "epoch": 0.046228710462287104, "grad_norm": 1.3429917702468868, "learning_rate": 9.993155920251252e-06, "loss": 0.2433, "step": 475 }, { "epoch": 0.04632603406326034, "grad_norm": 1.3651155739367906, "learning_rate": 9.993073232190949e-06, "loss": 0.2947, "step": 476 }, { "epoch": 0.046423357664233576, "grad_norm": 1.7815516701613203, "learning_rate": 9.992990047965887e-06, "loss": 0.5372, "step": 477 }, { "epoch": 0.046520681265206816, "grad_norm": 1.846696342179327, "learning_rate": 9.992906367584337e-06, "loss": 0.5127, "step": 478 }, { "epoch": 0.04661800486618005, "grad_norm": 1.7511253825578088, "learning_rate": 9.992822191054612e-06, "loss": 0.4074, "step": 479 }, { "epoch": 0.04671532846715328, "grad_norm": 1.8105635986872588, "learning_rate": 9.992737518385076e-06, "loss": 0.4998, "step": 480 }, { "epoch": 0.04681265206812652, "grad_norm": 2.2743597617900746, "learning_rate": 9.992652349584147e-06, "loss": 0.6249, "step": 481 }, { "epoch": 0.046909975669099754, "grad_norm": 1.93948496382319, "learning_rate": 9.992566684660282e-06, "loss": 0.5411, "step": 482 }, { "epoch": 0.047007299270072994, "grad_norm": 1.4073760716303516, "learning_rate": 9.992480523621999e-06, "loss": 0.3506, "step": 483 }, { "epoch": 0.04710462287104623, "grad_norm": 1.388293079160528, "learning_rate": 9.992393866477856e-06, "loss": 0.3304, "step": 484 }, { "epoch": 0.04720194647201947, "grad_norm": 2.082643572745618, "learning_rate": 9.992306713236467e-06, "loss": 0.5653, "step": 485 }, { "epoch": 0.0472992700729927, "grad_norm": 1.7104664332606834, "learning_rate": 9.992219063906492e-06, "loss": 0.3317, "step": 486 }, { "epoch": 0.04739659367396594, "grad_norm": 1.7575848919482624, "learning_rate": 9.992130918496638e-06, "loss": 0.4109, "step": 487 }, { "epoch": 0.04749391727493917, "grad_norm": 1.7351379091271637, "learning_rate": 9.992042277015668e-06, "loss": 0.5065, "step": 488 }, { "epoch": 0.04759124087591241, "grad_norm": 1.4444570948381004, "learning_rate": 9.991953139472387e-06, "loss": 0.4023, "step": 489 }, { "epoch": 0.047688564476885645, "grad_norm": 1.4697709289140384, "learning_rate": 9.991863505875656e-06, "loss": 0.3364, "step": 490 }, { "epoch": 0.04778588807785888, "grad_norm": 1.9428205960506804, "learning_rate": 9.99177337623438e-06, "loss": 0.4303, "step": 491 }, { "epoch": 0.04788321167883212, "grad_norm": 1.931152158561148, "learning_rate": 9.991682750557516e-06, "loss": 0.2857, "step": 492 }, { "epoch": 0.04798053527980535, "grad_norm": 1.9301394655308035, "learning_rate": 9.991591628854067e-06, "loss": 0.5998, "step": 493 }, { "epoch": 0.04807785888077859, "grad_norm": 1.7788293016868693, "learning_rate": 9.99150001113309e-06, "loss": 0.4595, "step": 494 }, { "epoch": 0.04817518248175182, "grad_norm": 2.0641225732440134, "learning_rate": 9.99140789740369e-06, "loss": 0.3848, "step": 495 }, { "epoch": 0.04827250608272506, "grad_norm": 2.2832955373527044, "learning_rate": 9.99131528767502e-06, "loss": 0.6396, "step": 496 }, { "epoch": 0.048369829683698295, "grad_norm": 1.6658790952812916, "learning_rate": 9.99122218195628e-06, "loss": 0.5429, "step": 497 }, { "epoch": 0.048467153284671535, "grad_norm": 1.6568038302360257, "learning_rate": 9.991128580256725e-06, "loss": 0.4532, "step": 498 }, { "epoch": 0.04856447688564477, "grad_norm": 1.8451659374514144, "learning_rate": 9.991034482585656e-06, "loss": 0.5845, "step": 499 }, { "epoch": 0.04866180048661801, "grad_norm": 1.9103948838029656, "learning_rate": 9.99093988895242e-06, "loss": 0.5508, "step": 500 }, { "epoch": 0.04875912408759124, "grad_norm": 1.9691733858712537, "learning_rate": 9.990844799366422e-06, "loss": 0.6374, "step": 501 }, { "epoch": 0.048856447688564474, "grad_norm": 2.1278472226161846, "learning_rate": 9.990749213837108e-06, "loss": 0.572, "step": 502 }, { "epoch": 0.04895377128953771, "grad_norm": 1.9704028865885994, "learning_rate": 9.990653132373977e-06, "loss": 0.6282, "step": 503 }, { "epoch": 0.049051094890510946, "grad_norm": 1.8965741341561362, "learning_rate": 9.990556554986577e-06, "loss": 0.5749, "step": 504 }, { "epoch": 0.049148418491484186, "grad_norm": 1.5425018763105707, "learning_rate": 9.990459481684504e-06, "loss": 0.4236, "step": 505 }, { "epoch": 0.04924574209245742, "grad_norm": 1.736669998068125, "learning_rate": 9.990361912477405e-06, "loss": 0.4275, "step": 506 }, { "epoch": 0.04934306569343066, "grad_norm": 2.049335776858506, "learning_rate": 9.990263847374976e-06, "loss": 0.6897, "step": 507 }, { "epoch": 0.04944038929440389, "grad_norm": 1.8544975871268152, "learning_rate": 9.990165286386961e-06, "loss": 0.4811, "step": 508 }, { "epoch": 0.04953771289537713, "grad_norm": 1.5709178763522822, "learning_rate": 9.990066229523155e-06, "loss": 0.4585, "step": 509 }, { "epoch": 0.049635036496350364, "grad_norm": 2.1410068811754153, "learning_rate": 9.989966676793399e-06, "loss": 0.4773, "step": 510 }, { "epoch": 0.049732360097323604, "grad_norm": 1.760724042734433, "learning_rate": 9.989866628207589e-06, "loss": 0.3144, "step": 511 }, { "epoch": 0.04982968369829684, "grad_norm": 1.8521560168370175, "learning_rate": 9.989766083775662e-06, "loss": 0.4656, "step": 512 }, { "epoch": 0.049927007299270076, "grad_norm": 1.544987615640627, "learning_rate": 9.989665043507616e-06, "loss": 0.4089, "step": 513 }, { "epoch": 0.05002433090024331, "grad_norm": 1.9122960249889975, "learning_rate": 9.989563507413487e-06, "loss": 0.4535, "step": 514 }, { "epoch": 0.05012165450121654, "grad_norm": 1.5187134098621655, "learning_rate": 9.989461475503363e-06, "loss": 0.31, "step": 515 }, { "epoch": 0.05021897810218978, "grad_norm": 1.562160455050312, "learning_rate": 9.989358947787389e-06, "loss": 0.4009, "step": 516 }, { "epoch": 0.050316301703163015, "grad_norm": 1.738084966314413, "learning_rate": 9.989255924275746e-06, "loss": 0.4723, "step": 517 }, { "epoch": 0.050413625304136254, "grad_norm": 2.156580581755068, "learning_rate": 9.989152404978678e-06, "loss": 0.4407, "step": 518 }, { "epoch": 0.05051094890510949, "grad_norm": 1.8652302207700793, "learning_rate": 9.989048389906469e-06, "loss": 0.587, "step": 519 }, { "epoch": 0.05060827250608273, "grad_norm": 1.5934369396830426, "learning_rate": 9.988943879069452e-06, "loss": 0.3961, "step": 520 }, { "epoch": 0.05070559610705596, "grad_norm": 1.4294562647861604, "learning_rate": 9.988838872478017e-06, "loss": 0.3382, "step": 521 }, { "epoch": 0.0508029197080292, "grad_norm": 1.5693240874435923, "learning_rate": 9.988733370142598e-06, "loss": 0.3876, "step": 522 }, { "epoch": 0.05090024330900243, "grad_norm": 1.6720738515514542, "learning_rate": 9.988627372073678e-06, "loss": 0.448, "step": 523 }, { "epoch": 0.05099756690997567, "grad_norm": 2.0438207304961367, "learning_rate": 9.988520878281787e-06, "loss": 0.5724, "step": 524 }, { "epoch": 0.051094890510948905, "grad_norm": 2.0003463921985456, "learning_rate": 9.988413888777512e-06, "loss": 0.4506, "step": 525 }, { "epoch": 0.05119221411192214, "grad_norm": 2.11812759304704, "learning_rate": 9.988306403571482e-06, "loss": 0.757, "step": 526 }, { "epoch": 0.05128953771289538, "grad_norm": 1.5594386055307068, "learning_rate": 9.98819842267438e-06, "loss": 0.4145, "step": 527 }, { "epoch": 0.05138686131386861, "grad_norm": 1.917978943216931, "learning_rate": 9.988089946096933e-06, "loss": 0.5363, "step": 528 }, { "epoch": 0.05148418491484185, "grad_norm": 1.3212282063862113, "learning_rate": 9.987980973849924e-06, "loss": 0.3132, "step": 529 }, { "epoch": 0.05158150851581508, "grad_norm": 1.2285769982465171, "learning_rate": 9.987871505944177e-06, "loss": 0.2287, "step": 530 }, { "epoch": 0.05167883211678832, "grad_norm": 1.849610792922833, "learning_rate": 9.987761542390574e-06, "loss": 0.6487, "step": 531 }, { "epoch": 0.051776155717761556, "grad_norm": 1.158461389164102, "learning_rate": 9.987651083200044e-06, "loss": 0.2111, "step": 532 }, { "epoch": 0.051873479318734796, "grad_norm": 1.8450520976911682, "learning_rate": 9.987540128383556e-06, "loss": 0.5579, "step": 533 }, { "epoch": 0.05197080291970803, "grad_norm": 1.9047794610871986, "learning_rate": 9.98742867795214e-06, "loss": 0.4542, "step": 534 }, { "epoch": 0.05206812652068127, "grad_norm": 1.5564676952152843, "learning_rate": 9.987316731916872e-06, "loss": 0.4467, "step": 535 }, { "epoch": 0.0521654501216545, "grad_norm": 1.403952395827601, "learning_rate": 9.987204290288876e-06, "loss": 0.3761, "step": 536 }, { "epoch": 0.052262773722627734, "grad_norm": 1.948151749349848, "learning_rate": 9.987091353079323e-06, "loss": 0.5782, "step": 537 }, { "epoch": 0.052360097323600974, "grad_norm": 1.6211222818460531, "learning_rate": 9.986977920299437e-06, "loss": 0.4047, "step": 538 }, { "epoch": 0.052457420924574207, "grad_norm": 1.4911900726837217, "learning_rate": 9.986863991960491e-06, "loss": 0.3817, "step": 539 }, { "epoch": 0.052554744525547446, "grad_norm": 1.530872687739145, "learning_rate": 9.986749568073804e-06, "loss": 0.4639, "step": 540 }, { "epoch": 0.05265206812652068, "grad_norm": 1.766399180057757, "learning_rate": 9.986634648650746e-06, "loss": 0.5132, "step": 541 }, { "epoch": 0.05274939172749392, "grad_norm": 1.7318370911583716, "learning_rate": 9.98651923370274e-06, "loss": 0.5845, "step": 542 }, { "epoch": 0.05284671532846715, "grad_norm": 1.4523428175637472, "learning_rate": 9.986403323241252e-06, "loss": 0.3817, "step": 543 }, { "epoch": 0.05294403892944039, "grad_norm": 1.3085205057626972, "learning_rate": 9.9862869172778e-06, "loss": 0.294, "step": 544 }, { "epoch": 0.053041362530413624, "grad_norm": 1.749260064779093, "learning_rate": 9.986170015823953e-06, "loss": 0.3885, "step": 545 }, { "epoch": 0.053138686131386864, "grad_norm": 1.9224820302612053, "learning_rate": 9.986052618891326e-06, "loss": 0.5841, "step": 546 }, { "epoch": 0.0532360097323601, "grad_norm": 1.6019594770490224, "learning_rate": 9.985934726491587e-06, "loss": 0.5602, "step": 547 }, { "epoch": 0.05333333333333334, "grad_norm": 1.63788543125369, "learning_rate": 9.98581633863645e-06, "loss": 0.4913, "step": 548 }, { "epoch": 0.05343065693430657, "grad_norm": 1.7751230304686407, "learning_rate": 9.985697455337677e-06, "loss": 0.4575, "step": 549 }, { "epoch": 0.0535279805352798, "grad_norm": 1.4813830287768246, "learning_rate": 9.985578076607086e-06, "loss": 0.2811, "step": 550 }, { "epoch": 0.05362530413625304, "grad_norm": 1.8047180833743464, "learning_rate": 9.985458202456534e-06, "loss": 0.5564, "step": 551 }, { "epoch": 0.053722627737226275, "grad_norm": 1.4776771818705197, "learning_rate": 9.985337832897938e-06, "loss": 0.2842, "step": 552 }, { "epoch": 0.053819951338199515, "grad_norm": 1.800973083472876, "learning_rate": 9.985216967943256e-06, "loss": 0.4017, "step": 553 }, { "epoch": 0.05391727493917275, "grad_norm": 1.4167019147788764, "learning_rate": 9.985095607604502e-06, "loss": 0.2676, "step": 554 }, { "epoch": 0.05401459854014599, "grad_norm": 1.462279330828973, "learning_rate": 9.984973751893732e-06, "loss": 0.342, "step": 555 }, { "epoch": 0.05411192214111922, "grad_norm": 1.7941608662857766, "learning_rate": 9.984851400823056e-06, "loss": 0.4851, "step": 556 }, { "epoch": 0.05420924574209246, "grad_norm": 1.865163176610701, "learning_rate": 9.984728554404632e-06, "loss": 0.5938, "step": 557 }, { "epoch": 0.05430656934306569, "grad_norm": 1.9578700904261006, "learning_rate": 9.984605212650669e-06, "loss": 0.5846, "step": 558 }, { "epoch": 0.05440389294403893, "grad_norm": 1.7615345522382602, "learning_rate": 9.98448137557342e-06, "loss": 0.5517, "step": 559 }, { "epoch": 0.054501216545012166, "grad_norm": 1.7987507193579173, "learning_rate": 9.984357043185195e-06, "loss": 0.4511, "step": 560 }, { "epoch": 0.0545985401459854, "grad_norm": 1.8966136067258859, "learning_rate": 9.984232215498347e-06, "loss": 0.3339, "step": 561 }, { "epoch": 0.05469586374695864, "grad_norm": 1.760439118311743, "learning_rate": 9.98410689252528e-06, "loss": 0.4797, "step": 562 }, { "epoch": 0.05479318734793187, "grad_norm": 1.7467534741216573, "learning_rate": 9.983981074278448e-06, "loss": 0.3854, "step": 563 }, { "epoch": 0.05489051094890511, "grad_norm": 1.638747457914032, "learning_rate": 9.983854760770353e-06, "loss": 0.3215, "step": 564 }, { "epoch": 0.054987834549878344, "grad_norm": 1.565721167011275, "learning_rate": 9.983727952013546e-06, "loss": 0.3573, "step": 565 }, { "epoch": 0.05508515815085158, "grad_norm": 1.819373023432736, "learning_rate": 9.98360064802063e-06, "loss": 0.304, "step": 566 }, { "epoch": 0.055182481751824816, "grad_norm": 2.219648367380945, "learning_rate": 9.983472848804254e-06, "loss": 0.7398, "step": 567 }, { "epoch": 0.055279805352798056, "grad_norm": 1.7935096739228122, "learning_rate": 9.98334455437712e-06, "loss": 0.3257, "step": 568 }, { "epoch": 0.05537712895377129, "grad_norm": 2.085379879601924, "learning_rate": 9.983215764751971e-06, "loss": 0.3477, "step": 569 }, { "epoch": 0.05547445255474453, "grad_norm": 1.528881264990704, "learning_rate": 9.98308647994161e-06, "loss": 0.4173, "step": 570 }, { "epoch": 0.05557177615571776, "grad_norm": 1.282510416609492, "learning_rate": 9.982956699958883e-06, "loss": 0.3513, "step": 571 }, { "epoch": 0.055669099756690994, "grad_norm": 1.6035600811723405, "learning_rate": 9.982826424816688e-06, "loss": 0.3318, "step": 572 }, { "epoch": 0.055766423357664234, "grad_norm": 1.9455996381881653, "learning_rate": 9.982695654527966e-06, "loss": 0.4991, "step": 573 }, { "epoch": 0.05586374695863747, "grad_norm": 1.8397262762514839, "learning_rate": 9.982564389105714e-06, "loss": 0.345, "step": 574 }, { "epoch": 0.05596107055961071, "grad_norm": 1.7997461351876956, "learning_rate": 9.982432628562978e-06, "loss": 0.5384, "step": 575 }, { "epoch": 0.05605839416058394, "grad_norm": 1.6246101205121968, "learning_rate": 9.982300372912848e-06, "loss": 0.5499, "step": 576 }, { "epoch": 0.05615571776155718, "grad_norm": 1.9184631207748861, "learning_rate": 9.982167622168467e-06, "loss": 0.449, "step": 577 }, { "epoch": 0.05625304136253041, "grad_norm": 1.5368079698239796, "learning_rate": 9.982034376343029e-06, "loss": 0.3311, "step": 578 }, { "epoch": 0.05635036496350365, "grad_norm": 1.9061539422519105, "learning_rate": 9.98190063544977e-06, "loss": 0.4182, "step": 579 }, { "epoch": 0.056447688564476885, "grad_norm": 1.6727227174184238, "learning_rate": 9.981766399501984e-06, "loss": 0.482, "step": 580 }, { "epoch": 0.056545012165450124, "grad_norm": 1.8546055763617424, "learning_rate": 9.98163166851301e-06, "loss": 0.5758, "step": 581 }, { "epoch": 0.05664233576642336, "grad_norm": 2.0350303098403706, "learning_rate": 9.981496442496234e-06, "loss": 0.5236, "step": 582 }, { "epoch": 0.0567396593673966, "grad_norm": 1.3907379790284926, "learning_rate": 9.981360721465095e-06, "loss": 0.3375, "step": 583 }, { "epoch": 0.05683698296836983, "grad_norm": 2.0168702766261486, "learning_rate": 9.98122450543308e-06, "loss": 0.595, "step": 584 }, { "epoch": 0.05693430656934306, "grad_norm": 1.7248754760467295, "learning_rate": 9.981087794413722e-06, "loss": 0.3747, "step": 585 }, { "epoch": 0.0570316301703163, "grad_norm": 1.8918865818240052, "learning_rate": 9.98095058842061e-06, "loss": 0.5805, "step": 586 }, { "epoch": 0.057128953771289535, "grad_norm": 1.8691153689026438, "learning_rate": 9.980812887467377e-06, "loss": 0.3451, "step": 587 }, { "epoch": 0.057226277372262775, "grad_norm": 1.7475224395533677, "learning_rate": 9.980674691567705e-06, "loss": 0.2789, "step": 588 }, { "epoch": 0.05732360097323601, "grad_norm": 1.876124489873064, "learning_rate": 9.980536000735328e-06, "loss": 0.5917, "step": 589 }, { "epoch": 0.05742092457420925, "grad_norm": 1.6438847446693803, "learning_rate": 9.980396814984025e-06, "loss": 0.3063, "step": 590 }, { "epoch": 0.05751824817518248, "grad_norm": 1.7609146888426583, "learning_rate": 9.980257134327634e-06, "loss": 0.4177, "step": 591 }, { "epoch": 0.05761557177615572, "grad_norm": 3.1047413099950445, "learning_rate": 9.980116958780027e-06, "loss": 0.2793, "step": 592 }, { "epoch": 0.05771289537712895, "grad_norm": 1.3365913263494138, "learning_rate": 9.979976288355137e-06, "loss": 0.2754, "step": 593 }, { "epoch": 0.05781021897810219, "grad_norm": 1.7378721977452198, "learning_rate": 9.979835123066943e-06, "loss": 0.4156, "step": 594 }, { "epoch": 0.057907542579075426, "grad_norm": 1.7652517953930271, "learning_rate": 9.979693462929472e-06, "loss": 0.3768, "step": 595 }, { "epoch": 0.05800486618004866, "grad_norm": 2.4155692425963675, "learning_rate": 9.979551307956801e-06, "loss": 0.6409, "step": 596 }, { "epoch": 0.0581021897810219, "grad_norm": 2.2339995809091913, "learning_rate": 9.979408658163055e-06, "loss": 0.3134, "step": 597 }, { "epoch": 0.05819951338199513, "grad_norm": 1.9788468018769068, "learning_rate": 9.97926551356241e-06, "loss": 0.2509, "step": 598 }, { "epoch": 0.05829683698296837, "grad_norm": 4.0668515887714385, "learning_rate": 9.979121874169091e-06, "loss": 0.3322, "step": 599 }, { "epoch": 0.058394160583941604, "grad_norm": 2.0552497355613264, "learning_rate": 9.97897773999737e-06, "loss": 0.2732, "step": 600 }, { "epoch": 0.058491484184914844, "grad_norm": 1.7372746328291984, "learning_rate": 9.978833111061573e-06, "loss": 0.3021, "step": 601 }, { "epoch": 0.058588807785888077, "grad_norm": 1.8426989129405926, "learning_rate": 9.978687987376067e-06, "loss": 0.3147, "step": 602 }, { "epoch": 0.058686131386861316, "grad_norm": 1.456816302033054, "learning_rate": 9.978542368955278e-06, "loss": 0.3669, "step": 603 }, { "epoch": 0.05878345498783455, "grad_norm": 2.1398878847147973, "learning_rate": 9.978396255813672e-06, "loss": 0.457, "step": 604 }, { "epoch": 0.05888077858880779, "grad_norm": 1.860652260495742, "learning_rate": 9.978249647965769e-06, "loss": 0.5567, "step": 605 }, { "epoch": 0.05897810218978102, "grad_norm": 1.7559525207322428, "learning_rate": 9.97810254542614e-06, "loss": 0.439, "step": 606 }, { "epoch": 0.059075425790754255, "grad_norm": 1.4912680944094816, "learning_rate": 9.977954948209402e-06, "loss": 0.4431, "step": 607 }, { "epoch": 0.059172749391727494, "grad_norm": 1.766690700595448, "learning_rate": 9.97780685633022e-06, "loss": 0.3187, "step": 608 }, { "epoch": 0.05927007299270073, "grad_norm": 2.169180646458804, "learning_rate": 9.977658269803312e-06, "loss": 0.5042, "step": 609 }, { "epoch": 0.05936739659367397, "grad_norm": 1.623119439845207, "learning_rate": 9.977509188643441e-06, "loss": 0.3632, "step": 610 }, { "epoch": 0.0594647201946472, "grad_norm": 2.0976883017366226, "learning_rate": 9.977359612865424e-06, "loss": 0.6465, "step": 611 }, { "epoch": 0.05956204379562044, "grad_norm": 1.59126192242755, "learning_rate": 9.977209542484123e-06, "loss": 0.4335, "step": 612 }, { "epoch": 0.05965936739659367, "grad_norm": 1.6532378246551842, "learning_rate": 9.97705897751445e-06, "loss": 0.3462, "step": 613 }, { "epoch": 0.05975669099756691, "grad_norm": 1.6478059833585124, "learning_rate": 9.976907917971365e-06, "loss": 0.4063, "step": 614 }, { "epoch": 0.059854014598540145, "grad_norm": 1.750559308727237, "learning_rate": 9.976756363869884e-06, "loss": 0.5062, "step": 615 }, { "epoch": 0.059951338199513385, "grad_norm": 1.6400113365898012, "learning_rate": 9.976604315225063e-06, "loss": 0.3699, "step": 616 }, { "epoch": 0.06004866180048662, "grad_norm": 1.5449283565959169, "learning_rate": 9.976451772052013e-06, "loss": 0.3635, "step": 617 }, { "epoch": 0.06014598540145986, "grad_norm": 1.3799772345005799, "learning_rate": 9.97629873436589e-06, "loss": 0.2747, "step": 618 }, { "epoch": 0.06024330900243309, "grad_norm": 1.9454941262632244, "learning_rate": 9.976145202181905e-06, "loss": 0.4963, "step": 619 }, { "epoch": 0.06034063260340632, "grad_norm": 1.5274916477255973, "learning_rate": 9.975991175515311e-06, "loss": 0.3348, "step": 620 }, { "epoch": 0.06043795620437956, "grad_norm": 1.9623540496009142, "learning_rate": 9.975836654381416e-06, "loss": 0.5373, "step": 621 }, { "epoch": 0.060535279805352796, "grad_norm": 1.4248144765181632, "learning_rate": 9.975681638795575e-06, "loss": 0.3137, "step": 622 }, { "epoch": 0.060632603406326036, "grad_norm": 1.4366236793713136, "learning_rate": 9.975526128773192e-06, "loss": 0.3519, "step": 623 }, { "epoch": 0.06072992700729927, "grad_norm": 1.8458441140553945, "learning_rate": 9.97537012432972e-06, "loss": 0.3937, "step": 624 }, { "epoch": 0.06082725060827251, "grad_norm": 1.868271580826056, "learning_rate": 9.975213625480658e-06, "loss": 0.4567, "step": 625 }, { "epoch": 0.06092457420924574, "grad_norm": 2.4613001964869223, "learning_rate": 9.97505663224156e-06, "loss": 0.5607, "step": 626 }, { "epoch": 0.06102189781021898, "grad_norm": 1.6709839772769468, "learning_rate": 9.974899144628027e-06, "loss": 0.3233, "step": 627 }, { "epoch": 0.061119221411192214, "grad_norm": 1.8046591620263965, "learning_rate": 9.97474116265571e-06, "loss": 0.3929, "step": 628 }, { "epoch": 0.06121654501216545, "grad_norm": 1.7182161369033975, "learning_rate": 9.974582686340304e-06, "loss": 0.3804, "step": 629 }, { "epoch": 0.061313868613138686, "grad_norm": 2.435940855169524, "learning_rate": 9.974423715697558e-06, "loss": 0.7453, "step": 630 }, { "epoch": 0.06141119221411192, "grad_norm": 1.401143104634322, "learning_rate": 9.974264250743272e-06, "loss": 0.306, "step": 631 }, { "epoch": 0.06150851581508516, "grad_norm": 1.540550326071636, "learning_rate": 9.97410429149329e-06, "loss": 0.3582, "step": 632 }, { "epoch": 0.06160583941605839, "grad_norm": 4.038520112503673, "learning_rate": 9.973943837963507e-06, "loss": 0.2688, "step": 633 }, { "epoch": 0.06170316301703163, "grad_norm": 2.032927304778425, "learning_rate": 9.973782890169867e-06, "loss": 0.6952, "step": 634 }, { "epoch": 0.061800486618004864, "grad_norm": 1.5242884680104736, "learning_rate": 9.973621448128364e-06, "loss": 0.3957, "step": 635 }, { "epoch": 0.061897810218978104, "grad_norm": 1.599953340803732, "learning_rate": 9.973459511855042e-06, "loss": 0.3783, "step": 636 }, { "epoch": 0.06199513381995134, "grad_norm": 2.1886899708740697, "learning_rate": 9.973297081365988e-06, "loss": 0.5426, "step": 637 }, { "epoch": 0.06209245742092458, "grad_norm": 1.363421719809718, "learning_rate": 9.973134156677349e-06, "loss": 0.2707, "step": 638 }, { "epoch": 0.06218978102189781, "grad_norm": 1.883218491971664, "learning_rate": 9.972970737805312e-06, "loss": 0.543, "step": 639 }, { "epoch": 0.06228710462287105, "grad_norm": 1.6336178778276322, "learning_rate": 9.972806824766117e-06, "loss": 0.4833, "step": 640 }, { "epoch": 0.06238442822384428, "grad_norm": 1.74145478719615, "learning_rate": 9.972642417576049e-06, "loss": 0.5456, "step": 641 }, { "epoch": 0.062481751824817515, "grad_norm": 1.3939447959630629, "learning_rate": 9.972477516251448e-06, "loss": 0.2935, "step": 642 }, { "epoch": 0.06257907542579075, "grad_norm": 1.9741261661680443, "learning_rate": 9.9723121208087e-06, "loss": 0.4377, "step": 643 }, { "epoch": 0.06267639902676399, "grad_norm": 2.214700253529172, "learning_rate": 9.972146231264242e-06, "loss": 0.6711, "step": 644 }, { "epoch": 0.06277372262773723, "grad_norm": 1.7399845992974294, "learning_rate": 9.971979847634554e-06, "loss": 0.5327, "step": 645 }, { "epoch": 0.06287104622871047, "grad_norm": 1.3552365502663122, "learning_rate": 9.971812969936174e-06, "loss": 0.3553, "step": 646 }, { "epoch": 0.06296836982968369, "grad_norm": 1.8378075997453163, "learning_rate": 9.971645598185685e-06, "loss": 0.3709, "step": 647 }, { "epoch": 0.06306569343065693, "grad_norm": 1.7441350204189767, "learning_rate": 9.971477732399714e-06, "loss": 0.489, "step": 648 }, { "epoch": 0.06316301703163017, "grad_norm": 2.083031963167252, "learning_rate": 9.971309372594947e-06, "loss": 0.6196, "step": 649 }, { "epoch": 0.06326034063260341, "grad_norm": 1.5678236487001533, "learning_rate": 9.971140518788112e-06, "loss": 0.3202, "step": 650 }, { "epoch": 0.06335766423357664, "grad_norm": 1.7281008810115812, "learning_rate": 9.970971170995988e-06, "loss": 0.4169, "step": 651 }, { "epoch": 0.06345498783454988, "grad_norm": 1.5626981990993993, "learning_rate": 9.970801329235402e-06, "loss": 0.4238, "step": 652 }, { "epoch": 0.06355231143552312, "grad_norm": 1.5338214380715702, "learning_rate": 9.970630993523234e-06, "loss": 0.278, "step": 653 }, { "epoch": 0.06364963503649634, "grad_norm": 1.7806299033721755, "learning_rate": 9.970460163876409e-06, "loss": 0.5649, "step": 654 }, { "epoch": 0.06374695863746958, "grad_norm": 1.9349681554929028, "learning_rate": 9.9702888403119e-06, "loss": 0.3297, "step": 655 }, { "epoch": 0.06384428223844282, "grad_norm": 1.4947723050696704, "learning_rate": 9.970117022846736e-06, "loss": 0.4077, "step": 656 }, { "epoch": 0.06394160583941606, "grad_norm": 1.5696774237596223, "learning_rate": 9.96994471149799e-06, "loss": 0.4681, "step": 657 }, { "epoch": 0.06403892944038929, "grad_norm": 1.7662095984112474, "learning_rate": 9.969771906282781e-06, "loss": 0.539, "step": 658 }, { "epoch": 0.06413625304136253, "grad_norm": 2.926336951253308, "learning_rate": 9.969598607218285e-06, "loss": 0.4196, "step": 659 }, { "epoch": 0.06423357664233577, "grad_norm": 3.148192138198314, "learning_rate": 9.96942481432172e-06, "loss": 0.4827, "step": 660 }, { "epoch": 0.06433090024330901, "grad_norm": 1.790436662552377, "learning_rate": 9.969250527610356e-06, "loss": 0.4972, "step": 661 }, { "epoch": 0.06442822384428223, "grad_norm": 1.4712739725679773, "learning_rate": 9.969075747101514e-06, "loss": 0.4112, "step": 662 }, { "epoch": 0.06452554744525547, "grad_norm": 1.4521996617982842, "learning_rate": 9.96890047281256e-06, "loss": 0.3729, "step": 663 }, { "epoch": 0.06462287104622871, "grad_norm": 1.5457088814513262, "learning_rate": 9.96872470476091e-06, "loss": 0.4294, "step": 664 }, { "epoch": 0.06472019464720194, "grad_norm": 1.7644033340951866, "learning_rate": 9.968548442964034e-06, "loss": 0.4487, "step": 665 }, { "epoch": 0.06481751824817518, "grad_norm": 1.632555708701406, "learning_rate": 9.968371687439446e-06, "loss": 0.3929, "step": 666 }, { "epoch": 0.06491484184914842, "grad_norm": 1.8990302396780172, "learning_rate": 9.968194438204708e-06, "loss": 0.4101, "step": 667 }, { "epoch": 0.06501216545012166, "grad_norm": 2.092762728551112, "learning_rate": 9.968016695277436e-06, "loss": 0.5712, "step": 668 }, { "epoch": 0.06510948905109488, "grad_norm": 1.5876668887386824, "learning_rate": 9.967838458675292e-06, "loss": 0.494, "step": 669 }, { "epoch": 0.06520681265206812, "grad_norm": 1.7536517597940893, "learning_rate": 9.967659728415985e-06, "loss": 0.6121, "step": 670 }, { "epoch": 0.06530413625304136, "grad_norm": 1.9021294255711243, "learning_rate": 9.96748050451728e-06, "loss": 0.3634, "step": 671 }, { "epoch": 0.0654014598540146, "grad_norm": 1.4457078547633553, "learning_rate": 9.96730078699698e-06, "loss": 0.4586, "step": 672 }, { "epoch": 0.06549878345498783, "grad_norm": 1.6474950184261972, "learning_rate": 9.967120575872952e-06, "loss": 0.5028, "step": 673 }, { "epoch": 0.06559610705596107, "grad_norm": 1.9901979572232373, "learning_rate": 9.966939871163098e-06, "loss": 0.6986, "step": 674 }, { "epoch": 0.06569343065693431, "grad_norm": 1.3671458210722949, "learning_rate": 9.966758672885375e-06, "loss": 0.3945, "step": 675 }, { "epoch": 0.06579075425790755, "grad_norm": 1.8371332697903162, "learning_rate": 9.96657698105779e-06, "loss": 0.6782, "step": 676 }, { "epoch": 0.06588807785888078, "grad_norm": 1.1955013749239556, "learning_rate": 9.966394795698397e-06, "loss": 0.242, "step": 677 }, { "epoch": 0.06598540145985402, "grad_norm": 1.5330975344313047, "learning_rate": 9.966212116825302e-06, "loss": 0.4351, "step": 678 }, { "epoch": 0.06608272506082725, "grad_norm": 1.539581985713935, "learning_rate": 9.966028944456657e-06, "loss": 0.3512, "step": 679 }, { "epoch": 0.06618004866180048, "grad_norm": 1.9573455375443363, "learning_rate": 9.965845278610661e-06, "loss": 0.4859, "step": 680 }, { "epoch": 0.06627737226277372, "grad_norm": 1.8387055004344444, "learning_rate": 9.96566111930557e-06, "loss": 0.3831, "step": 681 }, { "epoch": 0.06637469586374696, "grad_norm": 1.7056154014174738, "learning_rate": 9.96547646655968e-06, "loss": 0.4675, "step": 682 }, { "epoch": 0.0664720194647202, "grad_norm": 1.881602931580563, "learning_rate": 9.965291320391342e-06, "loss": 0.5955, "step": 683 }, { "epoch": 0.06656934306569343, "grad_norm": 2.9885065529853416, "learning_rate": 9.965105680818955e-06, "loss": 0.393, "step": 684 }, { "epoch": 0.06666666666666667, "grad_norm": 1.7363492709096229, "learning_rate": 9.964919547860963e-06, "loss": 0.4903, "step": 685 }, { "epoch": 0.0667639902676399, "grad_norm": 1.8182376939684146, "learning_rate": 9.964732921535863e-06, "loss": 0.5443, "step": 686 }, { "epoch": 0.06686131386861315, "grad_norm": 1.6914779965026407, "learning_rate": 9.964545801862202e-06, "loss": 0.5119, "step": 687 }, { "epoch": 0.06695863746958637, "grad_norm": 1.2736843314571082, "learning_rate": 9.964358188858573e-06, "loss": 0.2495, "step": 688 }, { "epoch": 0.06705596107055961, "grad_norm": 1.5831736266585599, "learning_rate": 9.96417008254362e-06, "loss": 0.4489, "step": 689 }, { "epoch": 0.06715328467153285, "grad_norm": 2.2148297560046224, "learning_rate": 9.963981482936034e-06, "loss": 0.5415, "step": 690 }, { "epoch": 0.06725060827250608, "grad_norm": 1.5025934211262992, "learning_rate": 9.963792390054558e-06, "loss": 0.3903, "step": 691 }, { "epoch": 0.06734793187347932, "grad_norm": 1.4602374679322867, "learning_rate": 9.96360280391798e-06, "loss": 0.3199, "step": 692 }, { "epoch": 0.06744525547445256, "grad_norm": 1.5813416284844282, "learning_rate": 9.963412724545142e-06, "loss": 0.3213, "step": 693 }, { "epoch": 0.0675425790754258, "grad_norm": 1.246883512769049, "learning_rate": 9.96322215195493e-06, "loss": 0.2644, "step": 694 }, { "epoch": 0.06763990267639902, "grad_norm": 1.7094335347253355, "learning_rate": 9.963031086166282e-06, "loss": 0.4761, "step": 695 }, { "epoch": 0.06773722627737226, "grad_norm": 1.6516611118524773, "learning_rate": 9.962839527198184e-06, "loss": 0.4823, "step": 696 }, { "epoch": 0.0678345498783455, "grad_norm": 1.3531669839243998, "learning_rate": 9.962647475069672e-06, "loss": 0.4272, "step": 697 }, { "epoch": 0.06793187347931874, "grad_norm": 1.9430916606586504, "learning_rate": 9.962454929799829e-06, "loss": 0.5776, "step": 698 }, { "epoch": 0.06802919708029197, "grad_norm": 1.8772536403383466, "learning_rate": 9.962261891407792e-06, "loss": 0.6338, "step": 699 }, { "epoch": 0.0681265206812652, "grad_norm": 1.3972932620324034, "learning_rate": 9.96206835991274e-06, "loss": 0.3671, "step": 700 }, { "epoch": 0.06822384428223845, "grad_norm": 1.287329601381866, "learning_rate": 9.961874335333904e-06, "loss": 0.2744, "step": 701 }, { "epoch": 0.06832116788321167, "grad_norm": 1.5600519457751545, "learning_rate": 9.961679817690566e-06, "loss": 0.4433, "step": 702 }, { "epoch": 0.06841849148418491, "grad_norm": 1.3898736874388666, "learning_rate": 9.961484807002056e-06, "loss": 0.4197, "step": 703 }, { "epoch": 0.06851581508515815, "grad_norm": 1.672202746628868, "learning_rate": 9.961289303287749e-06, "loss": 0.4601, "step": 704 }, { "epoch": 0.06861313868613139, "grad_norm": 1.7427655274680753, "learning_rate": 9.961093306567076e-06, "loss": 0.5845, "step": 705 }, { "epoch": 0.06871046228710462, "grad_norm": 1.794570108008766, "learning_rate": 9.960896816859512e-06, "loss": 0.3459, "step": 706 }, { "epoch": 0.06880778588807786, "grad_norm": 1.6024314197975584, "learning_rate": 9.960699834184582e-06, "loss": 0.4441, "step": 707 }, { "epoch": 0.0689051094890511, "grad_norm": 1.619306935418848, "learning_rate": 9.960502358561858e-06, "loss": 0.4647, "step": 708 }, { "epoch": 0.06900243309002434, "grad_norm": 1.5009190604836247, "learning_rate": 9.960304390010968e-06, "loss": 0.373, "step": 709 }, { "epoch": 0.06909975669099756, "grad_norm": 1.8613999824223078, "learning_rate": 9.960105928551583e-06, "loss": 0.3926, "step": 710 }, { "epoch": 0.0691970802919708, "grad_norm": 2.8907340364253757, "learning_rate": 9.959906974203422e-06, "loss": 0.5451, "step": 711 }, { "epoch": 0.06929440389294404, "grad_norm": 1.826374356881247, "learning_rate": 9.959707526986256e-06, "loss": 0.4341, "step": 712 }, { "epoch": 0.06939172749391727, "grad_norm": 2.5001373253299133, "learning_rate": 9.959507586919903e-06, "loss": 0.6643, "step": 713 }, { "epoch": 0.06948905109489051, "grad_norm": 1.769427365923108, "learning_rate": 9.959307154024234e-06, "loss": 0.5431, "step": 714 }, { "epoch": 0.06958637469586375, "grad_norm": 2.3285358245695322, "learning_rate": 9.959106228319166e-06, "loss": 0.5274, "step": 715 }, { "epoch": 0.06968369829683699, "grad_norm": 1.4070234926508725, "learning_rate": 9.958904809824663e-06, "loss": 0.3257, "step": 716 }, { "epoch": 0.06978102189781021, "grad_norm": 1.9284568290872997, "learning_rate": 9.958702898560742e-06, "loss": 0.5648, "step": 717 }, { "epoch": 0.06987834549878345, "grad_norm": 2.092543866644565, "learning_rate": 9.958500494547465e-06, "loss": 0.6256, "step": 718 }, { "epoch": 0.0699756690997567, "grad_norm": 1.5948763588365042, "learning_rate": 9.958297597804947e-06, "loss": 0.4011, "step": 719 }, { "epoch": 0.07007299270072993, "grad_norm": 1.2246362905267065, "learning_rate": 9.958094208353348e-06, "loss": 0.2444, "step": 720 }, { "epoch": 0.07017031630170316, "grad_norm": 1.2302916868666773, "learning_rate": 9.95789032621288e-06, "loss": 0.3191, "step": 721 }, { "epoch": 0.0702676399026764, "grad_norm": 1.5504396768673763, "learning_rate": 9.957685951403803e-06, "loss": 0.3112, "step": 722 }, { "epoch": 0.07036496350364964, "grad_norm": 2.1205819146422438, "learning_rate": 9.957481083946427e-06, "loss": 0.3453, "step": 723 }, { "epoch": 0.07046228710462286, "grad_norm": 2.048519725880563, "learning_rate": 9.957275723861108e-06, "loss": 0.5266, "step": 724 }, { "epoch": 0.0705596107055961, "grad_norm": 1.4453693275620771, "learning_rate": 9.957069871168253e-06, "loss": 0.3082, "step": 725 }, { "epoch": 0.07065693430656934, "grad_norm": 1.8824931146868138, "learning_rate": 9.956863525888318e-06, "loss": 0.588, "step": 726 }, { "epoch": 0.07075425790754258, "grad_norm": 1.6143333569692804, "learning_rate": 9.956656688041807e-06, "loss": 0.4126, "step": 727 }, { "epoch": 0.07085158150851581, "grad_norm": 1.7905307392122496, "learning_rate": 9.956449357649276e-06, "loss": 0.521, "step": 728 }, { "epoch": 0.07094890510948905, "grad_norm": 1.3295021098228834, "learning_rate": 9.956241534731325e-06, "loss": 0.31, "step": 729 }, { "epoch": 0.07104622871046229, "grad_norm": 1.5783278835300563, "learning_rate": 9.956033219308607e-06, "loss": 0.3091, "step": 730 }, { "epoch": 0.07114355231143553, "grad_norm": 1.9905003004076265, "learning_rate": 9.955824411401822e-06, "loss": 0.3843, "step": 731 }, { "epoch": 0.07124087591240875, "grad_norm": 1.7644558301646922, "learning_rate": 9.955615111031717e-06, "loss": 0.4288, "step": 732 }, { "epoch": 0.071338199513382, "grad_norm": 1.5922207695027908, "learning_rate": 9.955405318219096e-06, "loss": 0.4767, "step": 733 }, { "epoch": 0.07143552311435523, "grad_norm": 1.7054240956141933, "learning_rate": 9.955195032984798e-06, "loss": 0.4082, "step": 734 }, { "epoch": 0.07153284671532846, "grad_norm": 1.3954970063738148, "learning_rate": 9.954984255349729e-06, "loss": 0.318, "step": 735 }, { "epoch": 0.0716301703163017, "grad_norm": 1.7287069268697828, "learning_rate": 9.954772985334825e-06, "loss": 0.4998, "step": 736 }, { "epoch": 0.07172749391727494, "grad_norm": 1.4535895804720915, "learning_rate": 9.954561222961086e-06, "loss": 0.2489, "step": 737 }, { "epoch": 0.07182481751824818, "grad_norm": 1.7113518757446542, "learning_rate": 9.954348968249552e-06, "loss": 0.4578, "step": 738 }, { "epoch": 0.0719221411192214, "grad_norm": 1.6741613993254088, "learning_rate": 9.954136221221316e-06, "loss": 0.4907, "step": 739 }, { "epoch": 0.07201946472019465, "grad_norm": 1.590982465166657, "learning_rate": 9.95392298189752e-06, "loss": 0.4116, "step": 740 }, { "epoch": 0.07211678832116789, "grad_norm": 1.422974716648181, "learning_rate": 9.953709250299351e-06, "loss": 0.3501, "step": 741 }, { "epoch": 0.07221411192214112, "grad_norm": 1.8424007198547667, "learning_rate": 9.953495026448048e-06, "loss": 0.5647, "step": 742 }, { "epoch": 0.07231143552311435, "grad_norm": 1.6572484299897867, "learning_rate": 9.953280310364902e-06, "loss": 0.3937, "step": 743 }, { "epoch": 0.07240875912408759, "grad_norm": 1.6027770112754065, "learning_rate": 9.953065102071245e-06, "loss": 0.3845, "step": 744 }, { "epoch": 0.07250608272506083, "grad_norm": 1.3618658637431431, "learning_rate": 9.952849401588464e-06, "loss": 0.3946, "step": 745 }, { "epoch": 0.07260340632603407, "grad_norm": 1.63075572158439, "learning_rate": 9.952633208937997e-06, "loss": 0.4506, "step": 746 }, { "epoch": 0.0727007299270073, "grad_norm": 1.483187632244976, "learning_rate": 9.95241652414132e-06, "loss": 0.3908, "step": 747 }, { "epoch": 0.07279805352798054, "grad_norm": 2.147960263046311, "learning_rate": 9.952199347219972e-06, "loss": 0.5249, "step": 748 }, { "epoch": 0.07289537712895378, "grad_norm": 1.5046941105429004, "learning_rate": 9.951981678195529e-06, "loss": 0.3592, "step": 749 }, { "epoch": 0.072992700729927, "grad_norm": 1.1457618113072725, "learning_rate": 9.951763517089624e-06, "loss": 0.2197, "step": 750 }, { "epoch": 0.07309002433090024, "grad_norm": 1.9275946136488011, "learning_rate": 9.951544863923934e-06, "loss": 0.5692, "step": 751 }, { "epoch": 0.07318734793187348, "grad_norm": 1.9590929330277462, "learning_rate": 9.95132571872019e-06, "loss": 0.7243, "step": 752 }, { "epoch": 0.07328467153284672, "grad_norm": 2.1368780826391283, "learning_rate": 9.951106081500162e-06, "loss": 0.7601, "step": 753 }, { "epoch": 0.07338199513381995, "grad_norm": 2.0085695969306396, "learning_rate": 9.950885952285682e-06, "loss": 0.5541, "step": 754 }, { "epoch": 0.07347931873479319, "grad_norm": 1.9283983503616706, "learning_rate": 9.950665331098622e-06, "loss": 0.3832, "step": 755 }, { "epoch": 0.07357664233576643, "grad_norm": 1.4173732379297153, "learning_rate": 9.950444217960902e-06, "loss": 0.379, "step": 756 }, { "epoch": 0.07367396593673967, "grad_norm": 1.5015176407129935, "learning_rate": 9.9502226128945e-06, "loss": 0.4696, "step": 757 }, { "epoch": 0.07377128953771289, "grad_norm": 1.6746905852394565, "learning_rate": 9.950000515921434e-06, "loss": 0.2984, "step": 758 }, { "epoch": 0.07386861313868613, "grad_norm": 1.4429847737048944, "learning_rate": 9.949777927063776e-06, "loss": 0.3748, "step": 759 }, { "epoch": 0.07396593673965937, "grad_norm": 1.1895632638034424, "learning_rate": 9.94955484634364e-06, "loss": 0.3014, "step": 760 }, { "epoch": 0.0740632603406326, "grad_norm": 1.5497241513071458, "learning_rate": 9.949331273783198e-06, "loss": 0.5458, "step": 761 }, { "epoch": 0.07416058394160584, "grad_norm": 1.5531214201672936, "learning_rate": 9.949107209404664e-06, "loss": 0.4575, "step": 762 }, { "epoch": 0.07425790754257908, "grad_norm": 1.3336107839559097, "learning_rate": 9.948882653230306e-06, "loss": 0.4227, "step": 763 }, { "epoch": 0.07435523114355232, "grad_norm": 1.7418209768074853, "learning_rate": 9.948657605282437e-06, "loss": 0.659, "step": 764 }, { "epoch": 0.07445255474452554, "grad_norm": 1.462439433090815, "learning_rate": 9.94843206558342e-06, "loss": 0.445, "step": 765 }, { "epoch": 0.07454987834549878, "grad_norm": 1.0856086178050317, "learning_rate": 9.948206034155666e-06, "loss": 0.2245, "step": 766 }, { "epoch": 0.07464720194647202, "grad_norm": 1.458503858496447, "learning_rate": 9.947979511021638e-06, "loss": 0.3009, "step": 767 }, { "epoch": 0.07474452554744526, "grad_norm": 1.1921292471996519, "learning_rate": 9.947752496203844e-06, "loss": 0.2988, "step": 768 }, { "epoch": 0.07484184914841849, "grad_norm": 1.6693024138876786, "learning_rate": 9.947524989724844e-06, "loss": 0.4783, "step": 769 }, { "epoch": 0.07493917274939173, "grad_norm": 1.4928671202909605, "learning_rate": 9.947296991607244e-06, "loss": 0.4161, "step": 770 }, { "epoch": 0.07503649635036497, "grad_norm": 1.4549005796935413, "learning_rate": 9.947068501873702e-06, "loss": 0.4186, "step": 771 }, { "epoch": 0.0751338199513382, "grad_norm": 1.7544781744298734, "learning_rate": 9.946839520546923e-06, "loss": 0.5593, "step": 772 }, { "epoch": 0.07523114355231143, "grad_norm": 1.561541454027553, "learning_rate": 9.946610047649659e-06, "loss": 0.5097, "step": 773 }, { "epoch": 0.07532846715328467, "grad_norm": 1.598616630831168, "learning_rate": 9.946380083204714e-06, "loss": 0.3744, "step": 774 }, { "epoch": 0.07542579075425791, "grad_norm": 1.6915556597188157, "learning_rate": 9.94614962723494e-06, "loss": 0.439, "step": 775 }, { "epoch": 0.07552311435523114, "grad_norm": 1.220024420697048, "learning_rate": 9.945918679763237e-06, "loss": 0.2339, "step": 776 }, { "epoch": 0.07562043795620438, "grad_norm": 1.6061445238682988, "learning_rate": 9.945687240812556e-06, "loss": 0.4493, "step": 777 }, { "epoch": 0.07571776155717762, "grad_norm": 1.400813806243779, "learning_rate": 9.945455310405895e-06, "loss": 0.4513, "step": 778 }, { "epoch": 0.07581508515815086, "grad_norm": 1.753751480308555, "learning_rate": 9.945222888566298e-06, "loss": 0.5379, "step": 779 }, { "epoch": 0.07591240875912408, "grad_norm": 1.4421667558329163, "learning_rate": 9.944989975316862e-06, "loss": 0.4118, "step": 780 }, { "epoch": 0.07600973236009732, "grad_norm": 1.4411974086247974, "learning_rate": 9.944756570680733e-06, "loss": 0.3295, "step": 781 }, { "epoch": 0.07610705596107056, "grad_norm": 1.5545586767450623, "learning_rate": 9.944522674681107e-06, "loss": 0.4146, "step": 782 }, { "epoch": 0.07620437956204379, "grad_norm": 2.0019900434858084, "learning_rate": 9.944288287341222e-06, "loss": 0.4945, "step": 783 }, { "epoch": 0.07630170316301703, "grad_norm": 1.5834930071710975, "learning_rate": 9.944053408684371e-06, "loss": 0.3781, "step": 784 }, { "epoch": 0.07639902676399027, "grad_norm": 1.5272521164667598, "learning_rate": 9.943818038733894e-06, "loss": 0.3865, "step": 785 }, { "epoch": 0.07649635036496351, "grad_norm": 1.8005925077547513, "learning_rate": 9.94358217751318e-06, "loss": 0.3951, "step": 786 }, { "epoch": 0.07659367396593673, "grad_norm": 2.0471085276865995, "learning_rate": 9.943345825045664e-06, "loss": 0.6391, "step": 787 }, { "epoch": 0.07669099756690997, "grad_norm": 1.7893386028077656, "learning_rate": 9.943108981354839e-06, "loss": 0.6373, "step": 788 }, { "epoch": 0.07678832116788321, "grad_norm": 1.6529186502183046, "learning_rate": 9.942871646464234e-06, "loss": 0.4901, "step": 789 }, { "epoch": 0.07688564476885645, "grad_norm": 1.8449837387732961, "learning_rate": 9.942633820397436e-06, "loss": 0.4444, "step": 790 }, { "epoch": 0.07698296836982968, "grad_norm": 1.5278738521461448, "learning_rate": 9.942395503178077e-06, "loss": 0.3701, "step": 791 }, { "epoch": 0.07708029197080292, "grad_norm": 1.8197808533034088, "learning_rate": 9.942156694829838e-06, "loss": 0.6142, "step": 792 }, { "epoch": 0.07717761557177616, "grad_norm": 1.8496691201700692, "learning_rate": 9.941917395376452e-06, "loss": 0.2021, "step": 793 }, { "epoch": 0.07727493917274939, "grad_norm": 1.8762664332677217, "learning_rate": 9.941677604841696e-06, "loss": 0.6742, "step": 794 }, { "epoch": 0.07737226277372262, "grad_norm": 1.5933514264940258, "learning_rate": 9.9414373232494e-06, "loss": 0.5156, "step": 795 }, { "epoch": 0.07746958637469586, "grad_norm": 1.538651154827247, "learning_rate": 9.94119655062344e-06, "loss": 0.446, "step": 796 }, { "epoch": 0.0775669099756691, "grad_norm": 3.7300878200470926, "learning_rate": 9.94095528698774e-06, "loss": 0.2745, "step": 797 }, { "epoch": 0.07766423357664233, "grad_norm": 1.685774804326696, "learning_rate": 9.940713532366277e-06, "loss": 0.4236, "step": 798 }, { "epoch": 0.07776155717761557, "grad_norm": 1.2528388212678458, "learning_rate": 9.940471286783074e-06, "loss": 0.308, "step": 799 }, { "epoch": 0.07785888077858881, "grad_norm": 1.5082779398207746, "learning_rate": 9.940228550262203e-06, "loss": 0.4925, "step": 800 }, { "epoch": 0.07795620437956205, "grad_norm": 1.544326069333433, "learning_rate": 9.939985322827784e-06, "loss": 0.4341, "step": 801 }, { "epoch": 0.07805352798053528, "grad_norm": 1.4959220289677864, "learning_rate": 9.939741604503987e-06, "loss": 0.4548, "step": 802 }, { "epoch": 0.07815085158150852, "grad_norm": 1.682287714178995, "learning_rate": 9.93949739531503e-06, "loss": 0.5277, "step": 803 }, { "epoch": 0.07824817518248176, "grad_norm": 1.6519496438708445, "learning_rate": 9.93925269528518e-06, "loss": 0.3074, "step": 804 }, { "epoch": 0.07834549878345498, "grad_norm": 1.4379883641500402, "learning_rate": 9.939007504438756e-06, "loss": 0.3069, "step": 805 }, { "epoch": 0.07844282238442822, "grad_norm": 2.0644552037743793, "learning_rate": 9.93876182280012e-06, "loss": 0.4479, "step": 806 }, { "epoch": 0.07854014598540146, "grad_norm": 1.4791313310441092, "learning_rate": 9.938515650393685e-06, "loss": 0.4255, "step": 807 }, { "epoch": 0.0786374695863747, "grad_norm": 1.4280736600967436, "learning_rate": 9.938268987243914e-06, "loss": 0.466, "step": 808 }, { "epoch": 0.07873479318734793, "grad_norm": 1.610976672135659, "learning_rate": 9.93802183337532e-06, "loss": 0.4327, "step": 809 }, { "epoch": 0.07883211678832117, "grad_norm": 1.5447130604673693, "learning_rate": 9.93777418881246e-06, "loss": 0.4931, "step": 810 }, { "epoch": 0.0789294403892944, "grad_norm": 1.3831325957946852, "learning_rate": 9.937526053579944e-06, "loss": 0.3877, "step": 811 }, { "epoch": 0.07902676399026765, "grad_norm": 1.4247112282736865, "learning_rate": 9.93727742770243e-06, "loss": 0.4168, "step": 812 }, { "epoch": 0.07912408759124087, "grad_norm": 1.5074130304911886, "learning_rate": 9.937028311204624e-06, "loss": 0.4747, "step": 813 }, { "epoch": 0.07922141119221411, "grad_norm": 1.4955958242475926, "learning_rate": 9.936778704111278e-06, "loss": 0.2999, "step": 814 }, { "epoch": 0.07931873479318735, "grad_norm": 1.6038468607718186, "learning_rate": 9.9365286064472e-06, "loss": 0.4897, "step": 815 }, { "epoch": 0.07941605839416059, "grad_norm": 1.8040845780349017, "learning_rate": 9.93627801823724e-06, "loss": 0.6413, "step": 816 }, { "epoch": 0.07951338199513382, "grad_norm": 1.4598215502284355, "learning_rate": 9.936026939506298e-06, "loss": 0.3687, "step": 817 }, { "epoch": 0.07961070559610706, "grad_norm": 1.340412030499075, "learning_rate": 9.935775370279324e-06, "loss": 0.3833, "step": 818 }, { "epoch": 0.0797080291970803, "grad_norm": 1.6913032059853774, "learning_rate": 9.935523310581318e-06, "loss": 0.5857, "step": 819 }, { "epoch": 0.07980535279805352, "grad_norm": 1.9970663728185467, "learning_rate": 9.93527076043733e-06, "loss": 0.6843, "step": 820 }, { "epoch": 0.07990267639902676, "grad_norm": 1.4408921562941295, "learning_rate": 9.93501771987245e-06, "loss": 0.4385, "step": 821 }, { "epoch": 0.08, "grad_norm": 1.5184490203891443, "learning_rate": 9.934764188911827e-06, "loss": 0.4708, "step": 822 }, { "epoch": 0.08009732360097324, "grad_norm": 1.8501562903086661, "learning_rate": 9.934510167580654e-06, "loss": 0.6431, "step": 823 }, { "epoch": 0.08019464720194647, "grad_norm": 1.6997829158405129, "learning_rate": 9.934255655904172e-06, "loss": 0.5188, "step": 824 }, { "epoch": 0.08029197080291971, "grad_norm": 1.8510241792275326, "learning_rate": 9.934000653907674e-06, "loss": 0.5457, "step": 825 }, { "epoch": 0.08038929440389295, "grad_norm": 1.6853569692908912, "learning_rate": 9.933745161616498e-06, "loss": 0.5062, "step": 826 }, { "epoch": 0.08048661800486619, "grad_norm": 1.3066104263898661, "learning_rate": 9.93348917905603e-06, "loss": 0.404, "step": 827 }, { "epoch": 0.08058394160583941, "grad_norm": 1.2788244408859646, "learning_rate": 9.933232706251712e-06, "loss": 0.3253, "step": 828 }, { "epoch": 0.08068126520681265, "grad_norm": 2.2690800072126325, "learning_rate": 9.932975743229027e-06, "loss": 0.3405, "step": 829 }, { "epoch": 0.08077858880778589, "grad_norm": 1.9113871035353245, "learning_rate": 9.932718290013512e-06, "loss": 0.5989, "step": 830 }, { "epoch": 0.08087591240875912, "grad_norm": 1.3655256798283997, "learning_rate": 9.932460346630748e-06, "loss": 0.2942, "step": 831 }, { "epoch": 0.08097323600973236, "grad_norm": 1.5234864838378999, "learning_rate": 9.932201913106366e-06, "loss": 0.3913, "step": 832 }, { "epoch": 0.0810705596107056, "grad_norm": 1.3752195876516826, "learning_rate": 9.93194298946605e-06, "loss": 0.3293, "step": 833 }, { "epoch": 0.08116788321167884, "grad_norm": 1.4842622412969824, "learning_rate": 9.931683575735527e-06, "loss": 0.4157, "step": 834 }, { "epoch": 0.08126520681265206, "grad_norm": 4.003685207313109, "learning_rate": 9.931423671940577e-06, "loss": 0.3276, "step": 835 }, { "epoch": 0.0813625304136253, "grad_norm": 1.509943035011216, "learning_rate": 9.931163278107023e-06, "loss": 0.4045, "step": 836 }, { "epoch": 0.08145985401459854, "grad_norm": 1.4382523765338775, "learning_rate": 9.930902394260746e-06, "loss": 0.2709, "step": 837 }, { "epoch": 0.08155717761557178, "grad_norm": 1.4492711471586157, "learning_rate": 9.930641020427665e-06, "loss": 0.3957, "step": 838 }, { "epoch": 0.08165450121654501, "grad_norm": 1.7428876214187694, "learning_rate": 9.930379156633758e-06, "loss": 0.5257, "step": 839 }, { "epoch": 0.08175182481751825, "grad_norm": 1.5652514836380926, "learning_rate": 9.930116802905042e-06, "loss": 0.4948, "step": 840 }, { "epoch": 0.08184914841849149, "grad_norm": 2.4133112951540494, "learning_rate": 9.929853959267589e-06, "loss": 0.5455, "step": 841 }, { "epoch": 0.08194647201946471, "grad_norm": 1.4309460046419233, "learning_rate": 9.929590625747518e-06, "loss": 0.4057, "step": 842 }, { "epoch": 0.08204379562043795, "grad_norm": 1.0450296792009146, "learning_rate": 9.929326802370995e-06, "loss": 0.2332, "step": 843 }, { "epoch": 0.0821411192214112, "grad_norm": 1.1201933325217828, "learning_rate": 9.92906248916424e-06, "loss": 0.3264, "step": 844 }, { "epoch": 0.08223844282238443, "grad_norm": 1.6243579769967154, "learning_rate": 9.928797686153515e-06, "loss": 0.5385, "step": 845 }, { "epoch": 0.08233576642335766, "grad_norm": 1.3496069901220336, "learning_rate": 9.928532393365136e-06, "loss": 0.3875, "step": 846 }, { "epoch": 0.0824330900243309, "grad_norm": 1.4862888245769246, "learning_rate": 9.928266610825462e-06, "loss": 0.4493, "step": 847 }, { "epoch": 0.08253041362530414, "grad_norm": 1.8305160014899666, "learning_rate": 9.928000338560906e-06, "loss": 0.4582, "step": 848 }, { "epoch": 0.08262773722627738, "grad_norm": 1.642584946989029, "learning_rate": 9.927733576597926e-06, "loss": 0.3347, "step": 849 }, { "epoch": 0.0827250608272506, "grad_norm": 1.5413363162928122, "learning_rate": 9.927466324963033e-06, "loss": 0.4607, "step": 850 }, { "epoch": 0.08282238442822384, "grad_norm": 1.7093263469236866, "learning_rate": 9.927198583682784e-06, "loss": 0.5706, "step": 851 }, { "epoch": 0.08291970802919708, "grad_norm": 1.531714933227777, "learning_rate": 9.926930352783781e-06, "loss": 0.533, "step": 852 }, { "epoch": 0.08301703163017031, "grad_norm": 1.8181822267445191, "learning_rate": 9.926661632292683e-06, "loss": 0.5946, "step": 853 }, { "epoch": 0.08311435523114355, "grad_norm": 1.8304662465930317, "learning_rate": 9.926392422236189e-06, "loss": 0.3746, "step": 854 }, { "epoch": 0.08321167883211679, "grad_norm": 1.3135536142885351, "learning_rate": 9.926122722641051e-06, "loss": 0.429, "step": 855 }, { "epoch": 0.08330900243309003, "grad_norm": 1.714390027755308, "learning_rate": 9.925852533534071e-06, "loss": 0.6806, "step": 856 }, { "epoch": 0.08340632603406326, "grad_norm": 1.3399957064659453, "learning_rate": 9.925581854942099e-06, "loss": 0.2824, "step": 857 }, { "epoch": 0.0835036496350365, "grad_norm": 1.3705351036499993, "learning_rate": 9.925310686892026e-06, "loss": 0.3085, "step": 858 }, { "epoch": 0.08360097323600973, "grad_norm": 1.5064665959171673, "learning_rate": 9.925039029410807e-06, "loss": 0.4445, "step": 859 }, { "epoch": 0.08369829683698297, "grad_norm": 1.725614330530946, "learning_rate": 9.924766882525433e-06, "loss": 0.4704, "step": 860 }, { "epoch": 0.0837956204379562, "grad_norm": 1.765372064078189, "learning_rate": 9.924494246262944e-06, "loss": 0.6383, "step": 861 }, { "epoch": 0.08389294403892944, "grad_norm": 2.085503007877936, "learning_rate": 9.924221120650434e-06, "loss": 0.296, "step": 862 }, { "epoch": 0.08399026763990268, "grad_norm": 1.7898541160892734, "learning_rate": 9.923947505715046e-06, "loss": 0.5991, "step": 863 }, { "epoch": 0.0840875912408759, "grad_norm": 1.6476104975968628, "learning_rate": 9.923673401483968e-06, "loss": 0.4734, "step": 864 }, { "epoch": 0.08418491484184915, "grad_norm": 1.5502768976775265, "learning_rate": 9.923398807984439e-06, "loss": 0.2764, "step": 865 }, { "epoch": 0.08428223844282239, "grad_norm": 1.2398437846135097, "learning_rate": 9.923123725243744e-06, "loss": 0.2705, "step": 866 }, { "epoch": 0.08437956204379563, "grad_norm": 1.5290591078236662, "learning_rate": 9.922848153289217e-06, "loss": 0.4228, "step": 867 }, { "epoch": 0.08447688564476885, "grad_norm": 1.134889947118225, "learning_rate": 9.922572092148244e-06, "loss": 0.2953, "step": 868 }, { "epoch": 0.08457420924574209, "grad_norm": 1.6307620082274505, "learning_rate": 9.922295541848257e-06, "loss": 0.3363, "step": 869 }, { "epoch": 0.08467153284671533, "grad_norm": 1.373015271795792, "learning_rate": 9.922018502416736e-06, "loss": 0.3593, "step": 870 }, { "epoch": 0.08476885644768857, "grad_norm": 1.7500724096304088, "learning_rate": 9.921740973881211e-06, "loss": 0.5236, "step": 871 }, { "epoch": 0.0848661800486618, "grad_norm": 1.6167507595463353, "learning_rate": 9.92146295626926e-06, "loss": 0.5138, "step": 872 }, { "epoch": 0.08496350364963504, "grad_norm": 1.0398007401901226, "learning_rate": 9.92118444960851e-06, "loss": 0.295, "step": 873 }, { "epoch": 0.08506082725060828, "grad_norm": 1.4140920056378707, "learning_rate": 9.920905453926637e-06, "loss": 0.4192, "step": 874 }, { "epoch": 0.0851581508515815, "grad_norm": 1.8785238213855096, "learning_rate": 9.920625969251365e-06, "loss": 0.4228, "step": 875 }, { "epoch": 0.08525547445255474, "grad_norm": 1.719991686268608, "learning_rate": 9.920345995610465e-06, "loss": 0.5026, "step": 876 }, { "epoch": 0.08535279805352798, "grad_norm": 1.7112372148926476, "learning_rate": 9.92006553303176e-06, "loss": 0.3157, "step": 877 }, { "epoch": 0.08545012165450122, "grad_norm": 2.5105720144829116, "learning_rate": 9.919784581543117e-06, "loss": 0.4777, "step": 878 }, { "epoch": 0.08554744525547445, "grad_norm": 1.42848630379055, "learning_rate": 9.919503141172458e-06, "loss": 0.3998, "step": 879 }, { "epoch": 0.08564476885644769, "grad_norm": 1.4246136626839867, "learning_rate": 9.919221211947748e-06, "loss": 0.4415, "step": 880 }, { "epoch": 0.08574209245742093, "grad_norm": 1.939970471855472, "learning_rate": 9.918938793897002e-06, "loss": 0.5887, "step": 881 }, { "epoch": 0.08583941605839417, "grad_norm": 1.5467402852284964, "learning_rate": 9.918655887048285e-06, "loss": 0.3726, "step": 882 }, { "epoch": 0.08593673965936739, "grad_norm": 1.6261636529000345, "learning_rate": 9.918372491429708e-06, "loss": 0.3382, "step": 883 }, { "epoch": 0.08603406326034063, "grad_norm": 1.4859289768748727, "learning_rate": 9.918088607069434e-06, "loss": 0.4837, "step": 884 }, { "epoch": 0.08613138686131387, "grad_norm": 1.8534453271170916, "learning_rate": 9.917804233995673e-06, "loss": 0.5948, "step": 885 }, { "epoch": 0.08622871046228711, "grad_norm": 1.3491809126204122, "learning_rate": 9.917519372236684e-06, "loss": 0.381, "step": 886 }, { "epoch": 0.08632603406326034, "grad_norm": 1.4913268478302555, "learning_rate": 9.91723402182077e-06, "loss": 0.2872, "step": 887 }, { "epoch": 0.08642335766423358, "grad_norm": 1.5345667515291348, "learning_rate": 9.916948182776289e-06, "loss": 0.4426, "step": 888 }, { "epoch": 0.08652068126520682, "grad_norm": 1.9142340135608018, "learning_rate": 9.916661855131646e-06, "loss": 0.467, "step": 889 }, { "epoch": 0.08661800486618004, "grad_norm": 1.7451883652681546, "learning_rate": 9.916375038915291e-06, "loss": 0.3579, "step": 890 }, { "epoch": 0.08671532846715328, "grad_norm": 3.3675828599824618, "learning_rate": 9.916087734155728e-06, "loss": 0.3965, "step": 891 }, { "epoch": 0.08681265206812652, "grad_norm": 1.6430989821947144, "learning_rate": 9.915799940881504e-06, "loss": 0.5089, "step": 892 }, { "epoch": 0.08690997566909976, "grad_norm": 1.8434153107573372, "learning_rate": 9.915511659121219e-06, "loss": 0.6513, "step": 893 }, { "epoch": 0.08700729927007299, "grad_norm": 1.7259560464984558, "learning_rate": 9.91522288890352e-06, "loss": 0.5963, "step": 894 }, { "epoch": 0.08710462287104623, "grad_norm": 1.4417036209809253, "learning_rate": 9.9149336302571e-06, "loss": 0.4076, "step": 895 }, { "epoch": 0.08720194647201947, "grad_norm": 1.4565626930182671, "learning_rate": 9.914643883210704e-06, "loss": 0.3548, "step": 896 }, { "epoch": 0.08729927007299271, "grad_norm": 1.8286482885292266, "learning_rate": 9.914353647793126e-06, "loss": 0.5158, "step": 897 }, { "epoch": 0.08739659367396593, "grad_norm": 1.573235746781315, "learning_rate": 9.914062924033204e-06, "loss": 0.4804, "step": 898 }, { "epoch": 0.08749391727493917, "grad_norm": 1.7725042500734154, "learning_rate": 9.91377171195983e-06, "loss": 0.4037, "step": 899 }, { "epoch": 0.08759124087591241, "grad_norm": 1.5572801757524644, "learning_rate": 9.913480011601939e-06, "loss": 0.2757, "step": 900 }, { "epoch": 0.08768856447688564, "grad_norm": 1.690990088453521, "learning_rate": 9.91318782298852e-06, "loss": 0.624, "step": 901 }, { "epoch": 0.08778588807785888, "grad_norm": 1.5797017595834213, "learning_rate": 9.912895146148609e-06, "loss": 0.418, "step": 902 }, { "epoch": 0.08788321167883212, "grad_norm": 1.722754374021215, "learning_rate": 9.912601981111287e-06, "loss": 0.5991, "step": 903 }, { "epoch": 0.08798053527980536, "grad_norm": 1.2395740583484196, "learning_rate": 9.912308327905683e-06, "loss": 0.3632, "step": 904 }, { "epoch": 0.08807785888077858, "grad_norm": 1.8637568028899596, "learning_rate": 9.912014186560985e-06, "loss": 0.5766, "step": 905 }, { "epoch": 0.08817518248175182, "grad_norm": 1.8489319991981024, "learning_rate": 9.911719557106418e-06, "loss": 0.6834, "step": 906 }, { "epoch": 0.08827250608272506, "grad_norm": 1.6692858460733677, "learning_rate": 9.911424439571258e-06, "loss": 0.5067, "step": 907 }, { "epoch": 0.0883698296836983, "grad_norm": 1.4727605888984552, "learning_rate": 9.911128833984834e-06, "loss": 0.3141, "step": 908 }, { "epoch": 0.08846715328467153, "grad_norm": 1.644393806422472, "learning_rate": 9.910832740376518e-06, "loss": 0.4599, "step": 909 }, { "epoch": 0.08856447688564477, "grad_norm": 1.730275300452632, "learning_rate": 9.910536158775734e-06, "loss": 0.3908, "step": 910 }, { "epoch": 0.08866180048661801, "grad_norm": 1.7281903494262714, "learning_rate": 9.910239089211955e-06, "loss": 0.5919, "step": 911 }, { "epoch": 0.08875912408759123, "grad_norm": 1.7234172913238917, "learning_rate": 9.909941531714699e-06, "loss": 0.609, "step": 912 }, { "epoch": 0.08885644768856447, "grad_norm": 1.4594702058569258, "learning_rate": 9.909643486313533e-06, "loss": 0.4399, "step": 913 }, { "epoch": 0.08895377128953771, "grad_norm": 1.4625782448468165, "learning_rate": 9.90934495303808e-06, "loss": 0.4011, "step": 914 }, { "epoch": 0.08905109489051095, "grad_norm": 1.7262645481609784, "learning_rate": 9.909045931918e-06, "loss": 0.4992, "step": 915 }, { "epoch": 0.08914841849148418, "grad_norm": 1.6255222361700263, "learning_rate": 9.908746422983007e-06, "loss": 0.4909, "step": 916 }, { "epoch": 0.08924574209245742, "grad_norm": 1.7512982185254946, "learning_rate": 9.908446426262865e-06, "loss": 0.5527, "step": 917 }, { "epoch": 0.08934306569343066, "grad_norm": 1.617605772613541, "learning_rate": 9.908145941787386e-06, "loss": 0.3228, "step": 918 }, { "epoch": 0.0894403892944039, "grad_norm": 1.489706963519404, "learning_rate": 9.907844969586427e-06, "loss": 0.4838, "step": 919 }, { "epoch": 0.08953771289537713, "grad_norm": 1.193837371345013, "learning_rate": 9.907543509689896e-06, "loss": 0.284, "step": 920 }, { "epoch": 0.08963503649635036, "grad_norm": 1.5855787651349198, "learning_rate": 9.907241562127752e-06, "loss": 0.4641, "step": 921 }, { "epoch": 0.0897323600973236, "grad_norm": 1.2401284480478103, "learning_rate": 9.906939126929998e-06, "loss": 0.246, "step": 922 }, { "epoch": 0.08982968369829683, "grad_norm": 1.503842201355298, "learning_rate": 9.906636204126685e-06, "loss": 0.4031, "step": 923 }, { "epoch": 0.08992700729927007, "grad_norm": 1.9138265658958267, "learning_rate": 9.906332793747917e-06, "loss": 0.587, "step": 924 }, { "epoch": 0.09002433090024331, "grad_norm": 1.5381184892388742, "learning_rate": 9.906028895823844e-06, "loss": 0.4119, "step": 925 }, { "epoch": 0.09012165450121655, "grad_norm": 1.5769181877690257, "learning_rate": 9.905724510384664e-06, "loss": 0.4071, "step": 926 }, { "epoch": 0.09021897810218978, "grad_norm": 1.4644408625641083, "learning_rate": 9.905419637460625e-06, "loss": 0.3656, "step": 927 }, { "epoch": 0.09031630170316302, "grad_norm": 2.043739071504731, "learning_rate": 9.90511427708202e-06, "loss": 0.6317, "step": 928 }, { "epoch": 0.09041362530413626, "grad_norm": 1.8397228419915481, "learning_rate": 9.904808429279195e-06, "loss": 0.6656, "step": 929 }, { "epoch": 0.0905109489051095, "grad_norm": 1.6689588837493128, "learning_rate": 9.904502094082542e-06, "loss": 0.4603, "step": 930 }, { "epoch": 0.09060827250608272, "grad_norm": 1.7157610479724803, "learning_rate": 9.9041952715225e-06, "loss": 0.3566, "step": 931 }, { "epoch": 0.09070559610705596, "grad_norm": 1.5797548847560638, "learning_rate": 9.90388796162956e-06, "loss": 0.527, "step": 932 }, { "epoch": 0.0908029197080292, "grad_norm": 1.3861944362556795, "learning_rate": 9.903580164434262e-06, "loss": 0.3555, "step": 933 }, { "epoch": 0.09090024330900243, "grad_norm": 1.4873043668950738, "learning_rate": 9.903271879967185e-06, "loss": 0.3606, "step": 934 }, { "epoch": 0.09099756690997567, "grad_norm": 1.5471770637050817, "learning_rate": 9.90296310825897e-06, "loss": 0.5407, "step": 935 }, { "epoch": 0.0910948905109489, "grad_norm": 1.7410898214633266, "learning_rate": 9.902653849340296e-06, "loss": 0.5604, "step": 936 }, { "epoch": 0.09119221411192215, "grad_norm": 1.490257412993615, "learning_rate": 9.902344103241897e-06, "loss": 0.4293, "step": 937 }, { "epoch": 0.09128953771289537, "grad_norm": 1.3076716120407041, "learning_rate": 9.90203386999455e-06, "loss": 0.4311, "step": 938 }, { "epoch": 0.09138686131386861, "grad_norm": 1.63883307554104, "learning_rate": 9.901723149629085e-06, "loss": 0.5026, "step": 939 }, { "epoch": 0.09148418491484185, "grad_norm": 1.460694807977355, "learning_rate": 9.901411942176377e-06, "loss": 0.4449, "step": 940 }, { "epoch": 0.09158150851581509, "grad_norm": 1.631318499416747, "learning_rate": 9.901100247667352e-06, "loss": 0.4762, "step": 941 }, { "epoch": 0.09167883211678832, "grad_norm": 1.472942456024595, "learning_rate": 9.900788066132982e-06, "loss": 0.4208, "step": 942 }, { "epoch": 0.09177615571776156, "grad_norm": 1.9471723252943203, "learning_rate": 9.900475397604292e-06, "loss": 0.4887, "step": 943 }, { "epoch": 0.0918734793187348, "grad_norm": 1.4192635165617975, "learning_rate": 9.900162242112348e-06, "loss": 0.4753, "step": 944 }, { "epoch": 0.09197080291970802, "grad_norm": 1.7864248496903834, "learning_rate": 9.89984859968827e-06, "loss": 0.6063, "step": 945 }, { "epoch": 0.09206812652068126, "grad_norm": 1.402919088092856, "learning_rate": 9.899534470363225e-06, "loss": 0.3561, "step": 946 }, { "epoch": 0.0921654501216545, "grad_norm": 1.15011785152118, "learning_rate": 9.89921985416843e-06, "loss": 0.2605, "step": 947 }, { "epoch": 0.09226277372262774, "grad_norm": 1.2940536511249239, "learning_rate": 9.898904751135145e-06, "loss": 0.2503, "step": 948 }, { "epoch": 0.09236009732360097, "grad_norm": 1.5093308152075566, "learning_rate": 9.898589161294684e-06, "loss": 0.4185, "step": 949 }, { "epoch": 0.09245742092457421, "grad_norm": 1.5826010349075055, "learning_rate": 9.898273084678406e-06, "loss": 0.536, "step": 950 }, { "epoch": 0.09255474452554745, "grad_norm": 1.5672518381317015, "learning_rate": 9.897956521317724e-06, "loss": 0.5068, "step": 951 }, { "epoch": 0.09265206812652069, "grad_norm": 1.784767292144658, "learning_rate": 9.89763947124409e-06, "loss": 0.6601, "step": 952 }, { "epoch": 0.09274939172749391, "grad_norm": 1.620681747107968, "learning_rate": 9.897321934489011e-06, "loss": 0.5402, "step": 953 }, { "epoch": 0.09284671532846715, "grad_norm": 1.7479722673062432, "learning_rate": 9.897003911084042e-06, "loss": 0.6593, "step": 954 }, { "epoch": 0.09294403892944039, "grad_norm": 1.6618363798373263, "learning_rate": 9.896685401060783e-06, "loss": 0.6086, "step": 955 }, { "epoch": 0.09304136253041363, "grad_norm": 1.3782603882872615, "learning_rate": 9.896366404450888e-06, "loss": 0.3431, "step": 956 }, { "epoch": 0.09313868613138686, "grad_norm": 1.6607836446620106, "learning_rate": 9.896046921286053e-06, "loss": 0.4015, "step": 957 }, { "epoch": 0.0932360097323601, "grad_norm": 1.372535143543006, "learning_rate": 9.895726951598026e-06, "loss": 0.3627, "step": 958 }, { "epoch": 0.09333333333333334, "grad_norm": 1.965175835699204, "learning_rate": 9.895406495418602e-06, "loss": 0.434, "step": 959 }, { "epoch": 0.09343065693430656, "grad_norm": 1.6072227382486934, "learning_rate": 9.895085552779626e-06, "loss": 0.3666, "step": 960 }, { "epoch": 0.0935279805352798, "grad_norm": 1.8680414138630521, "learning_rate": 9.894764123712991e-06, "loss": 0.6182, "step": 961 }, { "epoch": 0.09362530413625304, "grad_norm": 1.7249394724081422, "learning_rate": 9.894442208250636e-06, "loss": 0.569, "step": 962 }, { "epoch": 0.09372262773722628, "grad_norm": 1.7887658285510963, "learning_rate": 9.894119806424549e-06, "loss": 0.4825, "step": 963 }, { "epoch": 0.09381995133819951, "grad_norm": 1.4470695743772581, "learning_rate": 9.89379691826677e-06, "loss": 0.4036, "step": 964 }, { "epoch": 0.09391727493917275, "grad_norm": 1.739037372856574, "learning_rate": 9.893473543809383e-06, "loss": 0.3734, "step": 965 }, { "epoch": 0.09401459854014599, "grad_norm": 1.2401623802615098, "learning_rate": 9.893149683084522e-06, "loss": 0.2892, "step": 966 }, { "epoch": 0.09411192214111923, "grad_norm": 1.632367817316159, "learning_rate": 9.892825336124369e-06, "loss": 0.3324, "step": 967 }, { "epoch": 0.09420924574209245, "grad_norm": 1.4553279790204596, "learning_rate": 9.892500502961156e-06, "loss": 0.4518, "step": 968 }, { "epoch": 0.0943065693430657, "grad_norm": 2.0184949211791867, "learning_rate": 9.892175183627161e-06, "loss": 0.496, "step": 969 }, { "epoch": 0.09440389294403893, "grad_norm": 1.3847811204395728, "learning_rate": 9.89184937815471e-06, "loss": 0.3908, "step": 970 }, { "epoch": 0.09450121654501216, "grad_norm": 1.7325451795183482, "learning_rate": 9.89152308657618e-06, "loss": 0.5813, "step": 971 }, { "epoch": 0.0945985401459854, "grad_norm": 1.3485480854398895, "learning_rate": 9.891196308923994e-06, "loss": 0.2773, "step": 972 }, { "epoch": 0.09469586374695864, "grad_norm": 1.6137214411092917, "learning_rate": 9.890869045230625e-06, "loss": 0.573, "step": 973 }, { "epoch": 0.09479318734793188, "grad_norm": 1.8098732560393935, "learning_rate": 9.890541295528593e-06, "loss": 0.5765, "step": 974 }, { "epoch": 0.0948905109489051, "grad_norm": 1.7169741386061155, "learning_rate": 9.890213059850467e-06, "loss": 0.5463, "step": 975 }, { "epoch": 0.09498783454987834, "grad_norm": 1.6226425677233698, "learning_rate": 9.889884338228861e-06, "loss": 0.459, "step": 976 }, { "epoch": 0.09508515815085158, "grad_norm": 1.5712338302132318, "learning_rate": 9.889555130696445e-06, "loss": 0.2926, "step": 977 }, { "epoch": 0.09518248175182482, "grad_norm": 2.368668096329164, "learning_rate": 9.88922543728593e-06, "loss": 0.4602, "step": 978 }, { "epoch": 0.09527980535279805, "grad_norm": 1.5481463515619227, "learning_rate": 9.888895258030077e-06, "loss": 0.382, "step": 979 }, { "epoch": 0.09537712895377129, "grad_norm": 1.5566394762827083, "learning_rate": 9.888564592961698e-06, "loss": 0.4432, "step": 980 }, { "epoch": 0.09547445255474453, "grad_norm": 1.2929219586068095, "learning_rate": 9.888233442113651e-06, "loss": 0.2986, "step": 981 }, { "epoch": 0.09557177615571776, "grad_norm": 1.7926346211976876, "learning_rate": 9.887901805518841e-06, "loss": 0.4536, "step": 982 }, { "epoch": 0.095669099756691, "grad_norm": 1.5810862037952855, "learning_rate": 9.887569683210225e-06, "loss": 0.5143, "step": 983 }, { "epoch": 0.09576642335766423, "grad_norm": 1.486412737689962, "learning_rate": 9.887237075220805e-06, "loss": 0.4422, "step": 984 }, { "epoch": 0.09586374695863747, "grad_norm": 1.5634292890846626, "learning_rate": 9.886903981583633e-06, "loss": 0.5158, "step": 985 }, { "epoch": 0.0959610705596107, "grad_norm": 1.4911106877832496, "learning_rate": 9.88657040233181e-06, "loss": 0.3584, "step": 986 }, { "epoch": 0.09605839416058394, "grad_norm": 1.8920202230134835, "learning_rate": 9.886236337498481e-06, "loss": 0.7059, "step": 987 }, { "epoch": 0.09615571776155718, "grad_norm": 1.9765830057761664, "learning_rate": 9.885901787116844e-06, "loss": 0.3363, "step": 988 }, { "epoch": 0.09625304136253042, "grad_norm": 1.7412713212065478, "learning_rate": 9.885566751220144e-06, "loss": 0.6238, "step": 989 }, { "epoch": 0.09635036496350365, "grad_norm": 1.4558500764026314, "learning_rate": 9.885231229841675e-06, "loss": 0.5033, "step": 990 }, { "epoch": 0.09644768856447689, "grad_norm": 1.5722863237428275, "learning_rate": 9.884895223014772e-06, "loss": 0.3026, "step": 991 }, { "epoch": 0.09654501216545013, "grad_norm": 1.7850396516814273, "learning_rate": 9.88455873077283e-06, "loss": 0.6797, "step": 992 }, { "epoch": 0.09664233576642335, "grad_norm": 1.5907642595826164, "learning_rate": 9.884221753149286e-06, "loss": 0.5051, "step": 993 }, { "epoch": 0.09673965936739659, "grad_norm": 1.383326117178851, "learning_rate": 9.883884290177623e-06, "loss": 0.394, "step": 994 }, { "epoch": 0.09683698296836983, "grad_norm": 1.5330791836349085, "learning_rate": 9.883546341891375e-06, "loss": 0.4531, "step": 995 }, { "epoch": 0.09693430656934307, "grad_norm": 1.3858453283442664, "learning_rate": 9.883207908324126e-06, "loss": 0.4674, "step": 996 }, { "epoch": 0.0970316301703163, "grad_norm": 1.2633519423598012, "learning_rate": 9.882868989509507e-06, "loss": 0.3053, "step": 997 }, { "epoch": 0.09712895377128954, "grad_norm": 1.5725755469000553, "learning_rate": 9.882529585481194e-06, "loss": 0.5382, "step": 998 }, { "epoch": 0.09722627737226278, "grad_norm": 1.594807816051373, "learning_rate": 9.882189696272916e-06, "loss": 0.5027, "step": 999 }, { "epoch": 0.09732360097323602, "grad_norm": 1.7855937930735857, "learning_rate": 9.881849321918446e-06, "loss": 0.6336, "step": 1000 }, { "epoch": 0.09742092457420924, "grad_norm": 1.8161736452208326, "learning_rate": 9.88150846245161e-06, "loss": 0.5432, "step": 1001 }, { "epoch": 0.09751824817518248, "grad_norm": 1.2323791206307224, "learning_rate": 9.881167117906276e-06, "loss": 0.3361, "step": 1002 }, { "epoch": 0.09761557177615572, "grad_norm": 1.6720448345305876, "learning_rate": 9.880825288316367e-06, "loss": 0.3583, "step": 1003 }, { "epoch": 0.09771289537712895, "grad_norm": 1.408364549926656, "learning_rate": 9.880482973715846e-06, "loss": 0.3847, "step": 1004 }, { "epoch": 0.09781021897810219, "grad_norm": 1.493256031544701, "learning_rate": 9.880140174138735e-06, "loss": 0.3611, "step": 1005 }, { "epoch": 0.09790754257907543, "grad_norm": 1.3658283125944337, "learning_rate": 9.879796889619093e-06, "loss": 0.3555, "step": 1006 }, { "epoch": 0.09800486618004867, "grad_norm": 1.7346143127846696, "learning_rate": 9.879453120191037e-06, "loss": 0.5028, "step": 1007 }, { "epoch": 0.09810218978102189, "grad_norm": 1.9094090784905724, "learning_rate": 9.879108865888724e-06, "loss": 0.4799, "step": 1008 }, { "epoch": 0.09819951338199513, "grad_norm": 1.1235415223499565, "learning_rate": 9.878764126746364e-06, "loss": 0.2181, "step": 1009 }, { "epoch": 0.09829683698296837, "grad_norm": 1.494557121918356, "learning_rate": 9.878418902798215e-06, "loss": 0.4548, "step": 1010 }, { "epoch": 0.09839416058394161, "grad_norm": 1.5340021274706077, "learning_rate": 9.87807319407858e-06, "loss": 0.4952, "step": 1011 }, { "epoch": 0.09849148418491484, "grad_norm": 1.2523545024978981, "learning_rate": 9.877727000621815e-06, "loss": 0.2887, "step": 1012 }, { "epoch": 0.09858880778588808, "grad_norm": 1.424446798325285, "learning_rate": 9.877380322462317e-06, "loss": 0.3628, "step": 1013 }, { "epoch": 0.09868613138686132, "grad_norm": 1.6382574528105933, "learning_rate": 9.877033159634542e-06, "loss": 0.5396, "step": 1014 }, { "epoch": 0.09878345498783454, "grad_norm": 1.544256440771578, "learning_rate": 9.876685512172982e-06, "loss": 0.4031, "step": 1015 }, { "epoch": 0.09888077858880778, "grad_norm": 1.620162733287423, "learning_rate": 9.876337380112185e-06, "loss": 0.4925, "step": 1016 }, { "epoch": 0.09897810218978102, "grad_norm": 1.6140460771461889, "learning_rate": 9.875988763486746e-06, "loss": 0.5549, "step": 1017 }, { "epoch": 0.09907542579075426, "grad_norm": 1.6187864498320685, "learning_rate": 9.875639662331307e-06, "loss": 0.5034, "step": 1018 }, { "epoch": 0.09917274939172749, "grad_norm": 1.249422512171971, "learning_rate": 9.875290076680557e-06, "loss": 0.236, "step": 1019 }, { "epoch": 0.09927007299270073, "grad_norm": 1.5835572971087337, "learning_rate": 9.874940006569236e-06, "loss": 0.5309, "step": 1020 }, { "epoch": 0.09936739659367397, "grad_norm": 0.8658795502351594, "learning_rate": 9.874589452032131e-06, "loss": 0.1911, "step": 1021 }, { "epoch": 0.09946472019464721, "grad_norm": 1.3171385587421753, "learning_rate": 9.874238413104076e-06, "loss": 0.3486, "step": 1022 }, { "epoch": 0.09956204379562043, "grad_norm": 1.4498439375980756, "learning_rate": 9.873886889819953e-06, "loss": 0.1986, "step": 1023 }, { "epoch": 0.09965936739659367, "grad_norm": 1.5991307847988792, "learning_rate": 9.873534882214692e-06, "loss": 0.6397, "step": 1024 }, { "epoch": 0.09975669099756691, "grad_norm": 1.6135151765084201, "learning_rate": 9.873182390323277e-06, "loss": 0.4338, "step": 1025 }, { "epoch": 0.09985401459854015, "grad_norm": 1.465261170994732, "learning_rate": 9.872829414180733e-06, "loss": 0.4692, "step": 1026 }, { "epoch": 0.09995133819951338, "grad_norm": 1.6964068418559575, "learning_rate": 9.872475953822134e-06, "loss": 0.4763, "step": 1027 }, { "epoch": 0.10004866180048662, "grad_norm": 1.5209137969308788, "learning_rate": 9.872122009282604e-06, "loss": 0.4266, "step": 1028 }, { "epoch": 0.10014598540145986, "grad_norm": 1.4495568716439686, "learning_rate": 9.871767580597316e-06, "loss": 0.4087, "step": 1029 }, { "epoch": 0.10024330900243308, "grad_norm": 1.344434785457905, "learning_rate": 9.871412667801488e-06, "loss": 0.3797, "step": 1030 }, { "epoch": 0.10034063260340632, "grad_norm": 1.5794908259633444, "learning_rate": 9.871057270930392e-06, "loss": 0.3939, "step": 1031 }, { "epoch": 0.10043795620437956, "grad_norm": 1.5876979734473795, "learning_rate": 9.870701390019337e-06, "loss": 0.484, "step": 1032 }, { "epoch": 0.1005352798053528, "grad_norm": 1.8773231101994967, "learning_rate": 9.870345025103694e-06, "loss": 0.5893, "step": 1033 }, { "epoch": 0.10063260340632603, "grad_norm": 1.4927383125242464, "learning_rate": 9.869988176218871e-06, "loss": 0.4138, "step": 1034 }, { "epoch": 0.10072992700729927, "grad_norm": 1.4766306382054422, "learning_rate": 9.869630843400331e-06, "loss": 0.4125, "step": 1035 }, { "epoch": 0.10082725060827251, "grad_norm": 2.1872385141217388, "learning_rate": 9.86927302668358e-06, "loss": 0.4581, "step": 1036 }, { "epoch": 0.10092457420924575, "grad_norm": 1.4275090865666056, "learning_rate": 9.868914726104174e-06, "loss": 0.2393, "step": 1037 }, { "epoch": 0.10102189781021897, "grad_norm": 1.6989614006808447, "learning_rate": 9.868555941697721e-06, "loss": 0.4941, "step": 1038 }, { "epoch": 0.10111922141119221, "grad_norm": 1.4357333730365565, "learning_rate": 9.86819667349987e-06, "loss": 0.4907, "step": 1039 }, { "epoch": 0.10121654501216545, "grad_norm": 2.0026376735495055, "learning_rate": 9.867836921546326e-06, "loss": 0.8695, "step": 1040 }, { "epoch": 0.10131386861313868, "grad_norm": 1.6951372609783342, "learning_rate": 9.867476685872833e-06, "loss": 0.6236, "step": 1041 }, { "epoch": 0.10141119221411192, "grad_norm": 1.6963236381946833, "learning_rate": 9.86711596651519e-06, "loss": 0.6358, "step": 1042 }, { "epoch": 0.10150851581508516, "grad_norm": 1.5189733584329748, "learning_rate": 9.866754763509242e-06, "loss": 0.4374, "step": 1043 }, { "epoch": 0.1016058394160584, "grad_norm": 1.2748045341406278, "learning_rate": 9.866393076890881e-06, "loss": 0.4213, "step": 1044 }, { "epoch": 0.10170316301703163, "grad_norm": 1.7405552081322075, "learning_rate": 9.866030906696051e-06, "loss": 0.6708, "step": 1045 }, { "epoch": 0.10180048661800487, "grad_norm": 1.3495682131815454, "learning_rate": 9.865668252960737e-06, "loss": 0.3531, "step": 1046 }, { "epoch": 0.1018978102189781, "grad_norm": 1.5653185028552046, "learning_rate": 9.86530511572098e-06, "loss": 0.4331, "step": 1047 }, { "epoch": 0.10199513381995134, "grad_norm": 1.3992858529840162, "learning_rate": 9.864941495012861e-06, "loss": 0.3388, "step": 1048 }, { "epoch": 0.10209245742092457, "grad_norm": 1.6270586325333123, "learning_rate": 9.864577390872516e-06, "loss": 0.4234, "step": 1049 }, { "epoch": 0.10218978102189781, "grad_norm": 1.8656971621974168, "learning_rate": 9.864212803336126e-06, "loss": 0.718, "step": 1050 }, { "epoch": 0.10228710462287105, "grad_norm": 1.4029758909387644, "learning_rate": 9.86384773243992e-06, "loss": 0.3892, "step": 1051 }, { "epoch": 0.10238442822384428, "grad_norm": 1.1023559958942302, "learning_rate": 9.863482178220176e-06, "loss": 0.2453, "step": 1052 }, { "epoch": 0.10248175182481752, "grad_norm": 1.5775869982106272, "learning_rate": 9.863116140713219e-06, "loss": 0.5324, "step": 1053 }, { "epoch": 0.10257907542579076, "grad_norm": 1.603675899324949, "learning_rate": 9.86274961995542e-06, "loss": 0.4521, "step": 1054 }, { "epoch": 0.102676399026764, "grad_norm": 1.6020699046167006, "learning_rate": 9.862382615983203e-06, "loss": 0.4545, "step": 1055 }, { "epoch": 0.10277372262773722, "grad_norm": 1.474718021659803, "learning_rate": 9.862015128833036e-06, "loss": 0.4822, "step": 1056 }, { "epoch": 0.10287104622871046, "grad_norm": 1.6033514684549, "learning_rate": 9.861647158541438e-06, "loss": 0.5069, "step": 1057 }, { "epoch": 0.1029683698296837, "grad_norm": 1.4841655382640788, "learning_rate": 9.861278705144974e-06, "loss": 0.3865, "step": 1058 }, { "epoch": 0.10306569343065694, "grad_norm": 1.1425556408878823, "learning_rate": 9.860909768680259e-06, "loss": 0.2443, "step": 1059 }, { "epoch": 0.10316301703163017, "grad_norm": 1.5288676978753954, "learning_rate": 9.86054034918395e-06, "loss": 0.3652, "step": 1060 }, { "epoch": 0.1032603406326034, "grad_norm": 1.5264484093473076, "learning_rate": 9.860170446692758e-06, "loss": 0.3318, "step": 1061 }, { "epoch": 0.10335766423357665, "grad_norm": 1.4476258605632986, "learning_rate": 9.859800061243443e-06, "loss": 0.4518, "step": 1062 }, { "epoch": 0.10345498783454987, "grad_norm": 1.336933590040686, "learning_rate": 9.859429192872809e-06, "loss": 0.2652, "step": 1063 }, { "epoch": 0.10355231143552311, "grad_norm": 1.6050187197155075, "learning_rate": 9.859057841617709e-06, "loss": 0.5383, "step": 1064 }, { "epoch": 0.10364963503649635, "grad_norm": 1.3472405276196469, "learning_rate": 9.858686007515045e-06, "loss": 0.4483, "step": 1065 }, { "epoch": 0.10374695863746959, "grad_norm": 1.4838970820374793, "learning_rate": 9.858313690601767e-06, "loss": 0.3506, "step": 1066 }, { "epoch": 0.10384428223844282, "grad_norm": 1.5911831099601979, "learning_rate": 9.857940890914868e-06, "loss": 0.3995, "step": 1067 }, { "epoch": 0.10394160583941606, "grad_norm": 1.415577451063168, "learning_rate": 9.8575676084914e-06, "loss": 0.4773, "step": 1068 }, { "epoch": 0.1040389294403893, "grad_norm": 1.7250253730787564, "learning_rate": 9.857193843368451e-06, "loss": 0.4456, "step": 1069 }, { "epoch": 0.10413625304136254, "grad_norm": 1.5066269873708278, "learning_rate": 9.856819595583166e-06, "loss": 0.5481, "step": 1070 }, { "epoch": 0.10423357664233576, "grad_norm": 1.5626665408071483, "learning_rate": 9.856444865172732e-06, "loss": 0.5382, "step": 1071 }, { "epoch": 0.104330900243309, "grad_norm": 1.9089561390061884, "learning_rate": 9.856069652174385e-06, "loss": 0.5533, "step": 1072 }, { "epoch": 0.10442822384428224, "grad_norm": 1.2757688373398666, "learning_rate": 9.855693956625414e-06, "loss": 0.3065, "step": 1073 }, { "epoch": 0.10452554744525547, "grad_norm": 1.7230598513214688, "learning_rate": 9.85531777856315e-06, "loss": 0.5367, "step": 1074 }, { "epoch": 0.10462287104622871, "grad_norm": 1.8368494244508635, "learning_rate": 9.854941118024973e-06, "loss": 0.4587, "step": 1075 }, { "epoch": 0.10472019464720195, "grad_norm": 1.418583003899538, "learning_rate": 9.854563975048314e-06, "loss": 0.405, "step": 1076 }, { "epoch": 0.10481751824817519, "grad_norm": 1.555078045275604, "learning_rate": 9.854186349670648e-06, "loss": 0.5572, "step": 1077 }, { "epoch": 0.10491484184914841, "grad_norm": 1.5414220083120458, "learning_rate": 9.853808241929502e-06, "loss": 0.3382, "step": 1078 }, { "epoch": 0.10501216545012165, "grad_norm": 1.2895897451723073, "learning_rate": 9.853429651862445e-06, "loss": 0.4342, "step": 1079 }, { "epoch": 0.10510948905109489, "grad_norm": 1.3117010773132232, "learning_rate": 9.853050579507104e-06, "loss": 0.3751, "step": 1080 }, { "epoch": 0.10520681265206813, "grad_norm": 1.5440994948167002, "learning_rate": 9.852671024901141e-06, "loss": 0.4971, "step": 1081 }, { "epoch": 0.10530413625304136, "grad_norm": 1.2028388141262132, "learning_rate": 9.852290988082278e-06, "loss": 0.3933, "step": 1082 }, { "epoch": 0.1054014598540146, "grad_norm": 1.6199890049219825, "learning_rate": 9.851910469088275e-06, "loss": 0.5394, "step": 1083 }, { "epoch": 0.10549878345498784, "grad_norm": 1.4805170620003079, "learning_rate": 9.851529467956946e-06, "loss": 0.2421, "step": 1084 }, { "epoch": 0.10559610705596106, "grad_norm": 1.432802486072686, "learning_rate": 9.851147984726154e-06, "loss": 0.479, "step": 1085 }, { "epoch": 0.1056934306569343, "grad_norm": 1.7662999036343905, "learning_rate": 9.850766019433803e-06, "loss": 0.706, "step": 1086 }, { "epoch": 0.10579075425790754, "grad_norm": 1.9136497208168854, "learning_rate": 9.850383572117853e-06, "loss": 0.7672, "step": 1087 }, { "epoch": 0.10588807785888078, "grad_norm": 1.1667281997438979, "learning_rate": 9.850000642816306e-06, "loss": 0.2263, "step": 1088 }, { "epoch": 0.10598540145985401, "grad_norm": 1.3133144576431575, "learning_rate": 9.849617231567213e-06, "loss": 0.2211, "step": 1089 }, { "epoch": 0.10608272506082725, "grad_norm": 1.411642205718121, "learning_rate": 9.849233338408674e-06, "loss": 0.4379, "step": 1090 }, { "epoch": 0.10618004866180049, "grad_norm": 1.7114110143353651, "learning_rate": 9.84884896337884e-06, "loss": 0.462, "step": 1091 }, { "epoch": 0.10627737226277373, "grad_norm": 1.4035875335457177, "learning_rate": 9.848464106515903e-06, "loss": 0.317, "step": 1092 }, { "epoch": 0.10637469586374695, "grad_norm": 1.5988244446936477, "learning_rate": 9.848078767858107e-06, "loss": 0.5254, "step": 1093 }, { "epoch": 0.1064720194647202, "grad_norm": 1.6336010940510732, "learning_rate": 9.847692947443745e-06, "loss": 0.4979, "step": 1094 }, { "epoch": 0.10656934306569343, "grad_norm": 1.68747146017171, "learning_rate": 9.847306645311154e-06, "loss": 0.5515, "step": 1095 }, { "epoch": 0.10666666666666667, "grad_norm": 1.497709273552353, "learning_rate": 9.846919861498724e-06, "loss": 0.4221, "step": 1096 }, { "epoch": 0.1067639902676399, "grad_norm": 1.4761873606313476, "learning_rate": 9.846532596044887e-06, "loss": 0.4296, "step": 1097 }, { "epoch": 0.10686131386861314, "grad_norm": 1.1441862868877024, "learning_rate": 9.846144848988127e-06, "loss": 0.2816, "step": 1098 }, { "epoch": 0.10695863746958638, "grad_norm": 1.7272604657837642, "learning_rate": 9.845756620366976e-06, "loss": 0.5916, "step": 1099 }, { "epoch": 0.1070559610705596, "grad_norm": 1.3799505412872324, "learning_rate": 9.84536791022001e-06, "loss": 0.3947, "step": 1100 }, { "epoch": 0.10715328467153284, "grad_norm": 1.6943818099878132, "learning_rate": 9.844978718585855e-06, "loss": 0.4737, "step": 1101 }, { "epoch": 0.10725060827250608, "grad_norm": 1.5405614688920448, "learning_rate": 9.84458904550319e-06, "loss": 0.4152, "step": 1102 }, { "epoch": 0.10734793187347932, "grad_norm": 1.6335292867295117, "learning_rate": 9.844198891010733e-06, "loss": 0.5677, "step": 1103 }, { "epoch": 0.10744525547445255, "grad_norm": 1.302603147379972, "learning_rate": 9.843808255147253e-06, "loss": 0.4283, "step": 1104 }, { "epoch": 0.10754257907542579, "grad_norm": 1.7967506033919078, "learning_rate": 9.84341713795157e-06, "loss": 0.6995, "step": 1105 }, { "epoch": 0.10763990267639903, "grad_norm": 1.7320527346822367, "learning_rate": 9.84302553946255e-06, "loss": 0.5369, "step": 1106 }, { "epoch": 0.10773722627737227, "grad_norm": 1.2124746103676287, "learning_rate": 9.842633459719104e-06, "loss": 0.296, "step": 1107 }, { "epoch": 0.1078345498783455, "grad_norm": 1.6638227119864655, "learning_rate": 9.842240898760195e-06, "loss": 0.5632, "step": 1108 }, { "epoch": 0.10793187347931874, "grad_norm": 1.5728826792836543, "learning_rate": 9.841847856624833e-06, "loss": 0.3407, "step": 1109 }, { "epoch": 0.10802919708029197, "grad_norm": 1.4855225795030034, "learning_rate": 9.841454333352073e-06, "loss": 0.534, "step": 1110 }, { "epoch": 0.1081265206812652, "grad_norm": 1.741747608159628, "learning_rate": 9.841060328981019e-06, "loss": 0.5739, "step": 1111 }, { "epoch": 0.10822384428223844, "grad_norm": 1.2765533148109443, "learning_rate": 9.840665843550825e-06, "loss": 0.335, "step": 1112 }, { "epoch": 0.10832116788321168, "grad_norm": 1.9391527817309226, "learning_rate": 9.840270877100692e-06, "loss": 0.5604, "step": 1113 }, { "epoch": 0.10841849148418492, "grad_norm": 1.2570937099076989, "learning_rate": 9.839875429669865e-06, "loss": 0.3098, "step": 1114 }, { "epoch": 0.10851581508515815, "grad_norm": 1.6345857910998665, "learning_rate": 9.839479501297643e-06, "loss": 0.4665, "step": 1115 }, { "epoch": 0.10861313868613139, "grad_norm": 2.1039943309751075, "learning_rate": 9.839083092023368e-06, "loss": 0.8597, "step": 1116 }, { "epoch": 0.10871046228710463, "grad_norm": 1.634678554608885, "learning_rate": 9.838686201886432e-06, "loss": 0.4907, "step": 1117 }, { "epoch": 0.10880778588807787, "grad_norm": 1.328229383966676, "learning_rate": 9.838288830926274e-06, "loss": 0.3255, "step": 1118 }, { "epoch": 0.10890510948905109, "grad_norm": 1.3587359099021656, "learning_rate": 9.837890979182381e-06, "loss": 0.4224, "step": 1119 }, { "epoch": 0.10900243309002433, "grad_norm": 1.6242900911620413, "learning_rate": 9.837492646694287e-06, "loss": 0.4338, "step": 1120 }, { "epoch": 0.10909975669099757, "grad_norm": 1.5901048900387273, "learning_rate": 9.837093833501576e-06, "loss": 0.5168, "step": 1121 }, { "epoch": 0.1091970802919708, "grad_norm": 1.34172908606168, "learning_rate": 9.836694539643878e-06, "loss": 0.3233, "step": 1122 }, { "epoch": 0.10929440389294404, "grad_norm": 1.4724714330159256, "learning_rate": 9.83629476516087e-06, "loss": 0.3652, "step": 1123 }, { "epoch": 0.10939172749391728, "grad_norm": 1.4884050773310515, "learning_rate": 9.835894510092279e-06, "loss": 0.4622, "step": 1124 }, { "epoch": 0.10948905109489052, "grad_norm": 1.3181328020728609, "learning_rate": 9.835493774477877e-06, "loss": 0.4531, "step": 1125 }, { "epoch": 0.10958637469586374, "grad_norm": 1.5414298966880746, "learning_rate": 9.835092558357488e-06, "loss": 0.3659, "step": 1126 }, { "epoch": 0.10968369829683698, "grad_norm": 1.3248299507567909, "learning_rate": 9.834690861770979e-06, "loss": 0.3207, "step": 1127 }, { "epoch": 0.10978102189781022, "grad_norm": 1.5527535683267375, "learning_rate": 9.834288684758269e-06, "loss": 0.4938, "step": 1128 }, { "epoch": 0.10987834549878346, "grad_norm": 1.3342131255187983, "learning_rate": 9.83388602735932e-06, "loss": 0.4451, "step": 1129 }, { "epoch": 0.10997566909975669, "grad_norm": 1.0500905202266426, "learning_rate": 9.833482889614143e-06, "loss": 0.2408, "step": 1130 }, { "epoch": 0.11007299270072993, "grad_norm": 1.377353907486564, "learning_rate": 9.833079271562802e-06, "loss": 0.3945, "step": 1131 }, { "epoch": 0.11017031630170317, "grad_norm": 1.5823324787969848, "learning_rate": 9.832675173245404e-06, "loss": 0.6066, "step": 1132 }, { "epoch": 0.11026763990267639, "grad_norm": 1.7266167679625446, "learning_rate": 9.832270594702102e-06, "loss": 0.6417, "step": 1133 }, { "epoch": 0.11036496350364963, "grad_norm": 1.4091165783577269, "learning_rate": 9.831865535973103e-06, "loss": 0.2661, "step": 1134 }, { "epoch": 0.11046228710462287, "grad_norm": 0.9959339686876645, "learning_rate": 9.831459997098654e-06, "loss": 0.1744, "step": 1135 }, { "epoch": 0.11055961070559611, "grad_norm": 1.4748243970921762, "learning_rate": 9.831053978119056e-06, "loss": 0.4011, "step": 1136 }, { "epoch": 0.11065693430656934, "grad_norm": 1.5879686249629044, "learning_rate": 9.830647479074656e-06, "loss": 0.3021, "step": 1137 }, { "epoch": 0.11075425790754258, "grad_norm": 1.5057704716227702, "learning_rate": 9.830240500005845e-06, "loss": 0.2962, "step": 1138 }, { "epoch": 0.11085158150851582, "grad_norm": 1.7497051535586357, "learning_rate": 9.829833040953068e-06, "loss": 0.4717, "step": 1139 }, { "epoch": 0.11094890510948906, "grad_norm": 1.7819946472609902, "learning_rate": 9.829425101956812e-06, "loss": 0.6113, "step": 1140 }, { "epoch": 0.11104622871046228, "grad_norm": 1.7680522472506797, "learning_rate": 9.829016683057615e-06, "loss": 0.4672, "step": 1141 }, { "epoch": 0.11114355231143552, "grad_norm": 1.8291787265156998, "learning_rate": 9.828607784296063e-06, "loss": 0.5148, "step": 1142 }, { "epoch": 0.11124087591240876, "grad_norm": 1.4119536127948566, "learning_rate": 9.828198405712788e-06, "loss": 0.2698, "step": 1143 }, { "epoch": 0.11133819951338199, "grad_norm": 1.67600232780131, "learning_rate": 9.827788547348469e-06, "loss": 0.4912, "step": 1144 }, { "epoch": 0.11143552311435523, "grad_norm": 1.9367616538665617, "learning_rate": 9.827378209243835e-06, "loss": 0.3781, "step": 1145 }, { "epoch": 0.11153284671532847, "grad_norm": 1.7032208896905794, "learning_rate": 9.826967391439662e-06, "loss": 0.5816, "step": 1146 }, { "epoch": 0.11163017031630171, "grad_norm": 1.60872896431165, "learning_rate": 9.826556093976769e-06, "loss": 0.4654, "step": 1147 }, { "epoch": 0.11172749391727493, "grad_norm": 1.5752275514466696, "learning_rate": 9.826144316896033e-06, "loss": 0.3177, "step": 1148 }, { "epoch": 0.11182481751824817, "grad_norm": 1.8207599924827627, "learning_rate": 9.82573206023837e-06, "loss": 0.5701, "step": 1149 }, { "epoch": 0.11192214111922141, "grad_norm": 1.5850279506541385, "learning_rate": 9.825319324044745e-06, "loss": 0.5616, "step": 1150 }, { "epoch": 0.11201946472019465, "grad_norm": 1.360496233978723, "learning_rate": 9.824906108356174e-06, "loss": 0.3407, "step": 1151 }, { "epoch": 0.11211678832116788, "grad_norm": 1.6595565610362801, "learning_rate": 9.824492413213717e-06, "loss": 0.6641, "step": 1152 }, { "epoch": 0.11221411192214112, "grad_norm": 1.6031792644515102, "learning_rate": 9.824078238658483e-06, "loss": 0.4779, "step": 1153 }, { "epoch": 0.11231143552311436, "grad_norm": 1.0762751645680708, "learning_rate": 9.82366358473163e-06, "loss": 0.2739, "step": 1154 }, { "epoch": 0.11240875912408758, "grad_norm": 1.3660129842713564, "learning_rate": 9.82324845147436e-06, "loss": 0.5043, "step": 1155 }, { "epoch": 0.11250608272506082, "grad_norm": 1.6273408315616833, "learning_rate": 9.822832838927929e-06, "loss": 0.6159, "step": 1156 }, { "epoch": 0.11260340632603406, "grad_norm": 1.4216921342906768, "learning_rate": 9.822416747133634e-06, "loss": 0.4093, "step": 1157 }, { "epoch": 0.1127007299270073, "grad_norm": 1.8899721642114575, "learning_rate": 9.822000176132822e-06, "loss": 0.5586, "step": 1158 }, { "epoch": 0.11279805352798053, "grad_norm": 1.5144459966059345, "learning_rate": 9.821583125966889e-06, "loss": 0.3806, "step": 1159 }, { "epoch": 0.11289537712895377, "grad_norm": 1.61041803725934, "learning_rate": 9.821165596677278e-06, "loss": 0.4064, "step": 1160 }, { "epoch": 0.11299270072992701, "grad_norm": 1.5410637406837986, "learning_rate": 9.820747588305477e-06, "loss": 0.3526, "step": 1161 }, { "epoch": 0.11309002433090025, "grad_norm": 1.5545393523360629, "learning_rate": 9.820329100893026e-06, "loss": 0.3834, "step": 1162 }, { "epoch": 0.11318734793187347, "grad_norm": 1.6391567381322345, "learning_rate": 9.819910134481508e-06, "loss": 0.3849, "step": 1163 }, { "epoch": 0.11328467153284671, "grad_norm": 1.5204183543600032, "learning_rate": 9.819490689112559e-06, "loss": 0.4712, "step": 1164 }, { "epoch": 0.11338199513381995, "grad_norm": 1.5168954302933022, "learning_rate": 9.819070764827858e-06, "loss": 0.4662, "step": 1165 }, { "epoch": 0.1134793187347932, "grad_norm": 1.4412304117107342, "learning_rate": 9.818650361669133e-06, "loss": 0.3515, "step": 1166 }, { "epoch": 0.11357664233576642, "grad_norm": 1.5419710047923603, "learning_rate": 9.81822947967816e-06, "loss": 0.383, "step": 1167 }, { "epoch": 0.11367396593673966, "grad_norm": 1.59211707141906, "learning_rate": 9.817808118896759e-06, "loss": 0.5101, "step": 1168 }, { "epoch": 0.1137712895377129, "grad_norm": 1.9315831066859817, "learning_rate": 9.817386279366808e-06, "loss": 0.6179, "step": 1169 }, { "epoch": 0.11386861313868613, "grad_norm": 1.3153157684002792, "learning_rate": 9.816963961130218e-06, "loss": 0.2382, "step": 1170 }, { "epoch": 0.11396593673965937, "grad_norm": 1.3579619945410324, "learning_rate": 9.81654116422896e-06, "loss": 0.4424, "step": 1171 }, { "epoch": 0.1140632603406326, "grad_norm": 1.479330223962703, "learning_rate": 9.816117888705046e-06, "loss": 0.3647, "step": 1172 }, { "epoch": 0.11416058394160584, "grad_norm": 1.5031676224913018, "learning_rate": 9.815694134600537e-06, "loss": 0.3686, "step": 1173 }, { "epoch": 0.11425790754257907, "grad_norm": 1.6106095254885215, "learning_rate": 9.815269901957543e-06, "loss": 0.5309, "step": 1174 }, { "epoch": 0.11435523114355231, "grad_norm": 1.4367590943688036, "learning_rate": 9.814845190818218e-06, "loss": 0.3786, "step": 1175 }, { "epoch": 0.11445255474452555, "grad_norm": 2.0513510648109636, "learning_rate": 9.814420001224767e-06, "loss": 0.8885, "step": 1176 }, { "epoch": 0.11454987834549879, "grad_norm": 1.3799990465326748, "learning_rate": 9.813994333219443e-06, "loss": 0.3511, "step": 1177 }, { "epoch": 0.11464720194647202, "grad_norm": 1.2354207015762353, "learning_rate": 9.813568186844541e-06, "loss": 0.3571, "step": 1178 }, { "epoch": 0.11474452554744526, "grad_norm": 2.0501383618438678, "learning_rate": 9.813141562142409e-06, "loss": 0.4485, "step": 1179 }, { "epoch": 0.1148418491484185, "grad_norm": 1.351584991091541, "learning_rate": 9.812714459155444e-06, "loss": 0.2894, "step": 1180 }, { "epoch": 0.11493917274939172, "grad_norm": 1.3568994189032655, "learning_rate": 9.812286877926085e-06, "loss": 0.4016, "step": 1181 }, { "epoch": 0.11503649635036496, "grad_norm": 1.4949546840268106, "learning_rate": 9.81185881849682e-06, "loss": 0.527, "step": 1182 }, { "epoch": 0.1151338199513382, "grad_norm": 1.5053242129518953, "learning_rate": 9.811430280910186e-06, "loss": 0.4324, "step": 1183 }, { "epoch": 0.11523114355231144, "grad_norm": 1.2995408017430223, "learning_rate": 9.811001265208768e-06, "loss": 0.4592, "step": 1184 }, { "epoch": 0.11532846715328467, "grad_norm": 1.4103061247668216, "learning_rate": 9.810571771435197e-06, "loss": 0.4615, "step": 1185 }, { "epoch": 0.1154257907542579, "grad_norm": 1.3694132099540144, "learning_rate": 9.810141799632153e-06, "loss": 0.4224, "step": 1186 }, { "epoch": 0.11552311435523115, "grad_norm": 1.4494836775882813, "learning_rate": 9.809711349842363e-06, "loss": 0.4189, "step": 1187 }, { "epoch": 0.11562043795620439, "grad_norm": 1.5100099037805617, "learning_rate": 9.809280422108598e-06, "loss": 0.495, "step": 1188 }, { "epoch": 0.11571776155717761, "grad_norm": 1.449093301695385, "learning_rate": 9.808849016473682e-06, "loss": 0.345, "step": 1189 }, { "epoch": 0.11581508515815085, "grad_norm": 1.501093862959825, "learning_rate": 9.808417132980484e-06, "loss": 0.4624, "step": 1190 }, { "epoch": 0.11591240875912409, "grad_norm": 1.4567657310588336, "learning_rate": 9.807984771671919e-06, "loss": 0.2836, "step": 1191 }, { "epoch": 0.11600973236009732, "grad_norm": 1.6666134190000732, "learning_rate": 9.807551932590952e-06, "loss": 0.3341, "step": 1192 }, { "epoch": 0.11610705596107056, "grad_norm": 1.7534770482902293, "learning_rate": 9.807118615780595e-06, "loss": 0.6021, "step": 1193 }, { "epoch": 0.1162043795620438, "grad_norm": 1.744738707996039, "learning_rate": 9.806684821283908e-06, "loss": 0.4593, "step": 1194 }, { "epoch": 0.11630170316301704, "grad_norm": 1.7519974888996959, "learning_rate": 9.806250549143994e-06, "loss": 0.5433, "step": 1195 }, { "epoch": 0.11639902676399026, "grad_norm": 1.6094009249182397, "learning_rate": 9.805815799404008e-06, "loss": 0.6053, "step": 1196 }, { "epoch": 0.1164963503649635, "grad_norm": 1.4291146386614342, "learning_rate": 9.805380572107153e-06, "loss": 0.4377, "step": 1197 }, { "epoch": 0.11659367396593674, "grad_norm": 1.6092739629047335, "learning_rate": 9.804944867296678e-06, "loss": 0.5708, "step": 1198 }, { "epoch": 0.11669099756690998, "grad_norm": 1.3856208861087336, "learning_rate": 9.804508685015876e-06, "loss": 0.3677, "step": 1199 }, { "epoch": 0.11678832116788321, "grad_norm": 1.52110832375871, "learning_rate": 9.804072025308096e-06, "loss": 0.3076, "step": 1200 }, { "epoch": 0.11688564476885645, "grad_norm": 1.3072729020716074, "learning_rate": 9.803634888216724e-06, "loss": 0.2673, "step": 1201 }, { "epoch": 0.11698296836982969, "grad_norm": 1.9045471339964295, "learning_rate": 9.8031972737852e-06, "loss": 0.7326, "step": 1202 }, { "epoch": 0.11708029197080291, "grad_norm": 1.3351659498760804, "learning_rate": 9.802759182057013e-06, "loss": 0.4193, "step": 1203 }, { "epoch": 0.11717761557177615, "grad_norm": 1.4664570380446003, "learning_rate": 9.80232061307569e-06, "loss": 0.358, "step": 1204 }, { "epoch": 0.11727493917274939, "grad_norm": 1.1764722042212887, "learning_rate": 9.80188156688482e-06, "loss": 0.3093, "step": 1205 }, { "epoch": 0.11737226277372263, "grad_norm": 1.5415184448059258, "learning_rate": 9.801442043528026e-06, "loss": 0.4667, "step": 1206 }, { "epoch": 0.11746958637469586, "grad_norm": 1.4827166479100118, "learning_rate": 9.801002043048984e-06, "loss": 0.4876, "step": 1207 }, { "epoch": 0.1175669099756691, "grad_norm": 1.6786553338713377, "learning_rate": 9.80056156549142e-06, "loss": 0.5076, "step": 1208 }, { "epoch": 0.11766423357664234, "grad_norm": 1.2161366736688597, "learning_rate": 9.8001206108991e-06, "loss": 0.2247, "step": 1209 }, { "epoch": 0.11776155717761558, "grad_norm": 1.4015628266094937, "learning_rate": 9.799679179315846e-06, "loss": 0.4327, "step": 1210 }, { "epoch": 0.1178588807785888, "grad_norm": 1.5420255844625947, "learning_rate": 9.799237270785522e-06, "loss": 0.438, "step": 1211 }, { "epoch": 0.11795620437956204, "grad_norm": 1.5978057716745744, "learning_rate": 9.79879488535204e-06, "loss": 0.4203, "step": 1212 }, { "epoch": 0.11805352798053528, "grad_norm": 1.8973070083198396, "learning_rate": 9.79835202305936e-06, "loss": 0.7404, "step": 1213 }, { "epoch": 0.11815085158150851, "grad_norm": 1.5331088091760856, "learning_rate": 9.797908683951492e-06, "loss": 0.5378, "step": 1214 }, { "epoch": 0.11824817518248175, "grad_norm": 1.9627839775910105, "learning_rate": 9.797464868072489e-06, "loss": 0.6298, "step": 1215 }, { "epoch": 0.11834549878345499, "grad_norm": 1.5059209630421948, "learning_rate": 9.797020575466452e-06, "loss": 0.4233, "step": 1216 }, { "epoch": 0.11844282238442823, "grad_norm": 1.4714593450262028, "learning_rate": 9.796575806177531e-06, "loss": 0.4078, "step": 1217 }, { "epoch": 0.11854014598540145, "grad_norm": 1.812199008547911, "learning_rate": 9.796130560249926e-06, "loss": 0.6636, "step": 1218 }, { "epoch": 0.1186374695863747, "grad_norm": 1.330364448549248, "learning_rate": 9.795684837727878e-06, "loss": 0.2597, "step": 1219 }, { "epoch": 0.11873479318734793, "grad_norm": 1.1642089342014024, "learning_rate": 9.795238638655681e-06, "loss": 0.2669, "step": 1220 }, { "epoch": 0.11883211678832117, "grad_norm": 1.0578785975666756, "learning_rate": 9.794791963077672e-06, "loss": 0.2138, "step": 1221 }, { "epoch": 0.1189294403892944, "grad_norm": 1.2810119779981208, "learning_rate": 9.794344811038239e-06, "loss": 0.3426, "step": 1222 }, { "epoch": 0.11902676399026764, "grad_norm": 1.6109574325023976, "learning_rate": 9.793897182581816e-06, "loss": 0.4931, "step": 1223 }, { "epoch": 0.11912408759124088, "grad_norm": 1.8314564663365431, "learning_rate": 9.793449077752882e-06, "loss": 0.5424, "step": 1224 }, { "epoch": 0.1192214111922141, "grad_norm": 1.3266514401224994, "learning_rate": 9.793000496595968e-06, "loss": 0.3123, "step": 1225 }, { "epoch": 0.11931873479318734, "grad_norm": 1.624792232435884, "learning_rate": 9.792551439155649e-06, "loss": 0.3635, "step": 1226 }, { "epoch": 0.11941605839416058, "grad_norm": 1.306535519875853, "learning_rate": 9.792101905476547e-06, "loss": 0.3252, "step": 1227 }, { "epoch": 0.11951338199513382, "grad_norm": 1.591218471169796, "learning_rate": 9.791651895603333e-06, "loss": 0.5493, "step": 1228 }, { "epoch": 0.11961070559610705, "grad_norm": 1.8218114354346657, "learning_rate": 9.791201409580725e-06, "loss": 0.6988, "step": 1229 }, { "epoch": 0.11970802919708029, "grad_norm": 1.7366783724272585, "learning_rate": 9.790750447453487e-06, "loss": 0.4285, "step": 1230 }, { "epoch": 0.11980535279805353, "grad_norm": 1.9439764988659998, "learning_rate": 9.790299009266434e-06, "loss": 0.2787, "step": 1231 }, { "epoch": 0.11990267639902677, "grad_norm": 1.4894849660267724, "learning_rate": 9.789847095064425e-06, "loss": 0.2531, "step": 1232 }, { "epoch": 0.12, "grad_norm": 1.6270936536604101, "learning_rate": 9.789394704892364e-06, "loss": 0.5309, "step": 1233 }, { "epoch": 0.12009732360097324, "grad_norm": 1.4144832764840753, "learning_rate": 9.788941838795209e-06, "loss": 0.298, "step": 1234 }, { "epoch": 0.12019464720194648, "grad_norm": 1.546926786538444, "learning_rate": 9.788488496817958e-06, "loss": 0.4751, "step": 1235 }, { "epoch": 0.12029197080291971, "grad_norm": 1.5827216255031866, "learning_rate": 9.788034679005664e-06, "loss": 0.4576, "step": 1236 }, { "epoch": 0.12038929440389294, "grad_norm": 1.6103699210596951, "learning_rate": 9.78758038540342e-06, "loss": 0.4637, "step": 1237 }, { "epoch": 0.12048661800486618, "grad_norm": 1.4918367462943103, "learning_rate": 9.78712561605637e-06, "loss": 0.4998, "step": 1238 }, { "epoch": 0.12058394160583942, "grad_norm": 1.5775409788682337, "learning_rate": 9.786670371009706e-06, "loss": 0.4415, "step": 1239 }, { "epoch": 0.12068126520681265, "grad_norm": 1.5427286854632911, "learning_rate": 9.786214650308666e-06, "loss": 0.4606, "step": 1240 }, { "epoch": 0.12077858880778589, "grad_norm": 1.523821034203494, "learning_rate": 9.78575845399853e-06, "loss": 0.3918, "step": 1241 }, { "epoch": 0.12087591240875913, "grad_norm": 1.950297391662121, "learning_rate": 9.785301782124638e-06, "loss": 0.5579, "step": 1242 }, { "epoch": 0.12097323600973237, "grad_norm": 1.5957141815138678, "learning_rate": 9.784844634732367e-06, "loss": 0.3814, "step": 1243 }, { "epoch": 0.12107055961070559, "grad_norm": 1.3924341327971197, "learning_rate": 9.784387011867145e-06, "loss": 0.3576, "step": 1244 }, { "epoch": 0.12116788321167883, "grad_norm": 1.670661057733516, "learning_rate": 9.783928913574442e-06, "loss": 0.5307, "step": 1245 }, { "epoch": 0.12126520681265207, "grad_norm": 1.9162789104592521, "learning_rate": 9.783470339899783e-06, "loss": 0.2309, "step": 1246 }, { "epoch": 0.12136253041362531, "grad_norm": 1.4323883393925967, "learning_rate": 9.783011290888737e-06, "loss": 0.4816, "step": 1247 }, { "epoch": 0.12145985401459854, "grad_norm": 1.133557304990043, "learning_rate": 9.78255176658692e-06, "loss": 0.259, "step": 1248 }, { "epoch": 0.12155717761557178, "grad_norm": 1.6381613262272003, "learning_rate": 9.782091767039992e-06, "loss": 0.535, "step": 1249 }, { "epoch": 0.12165450121654502, "grad_norm": 1.521879132713644, "learning_rate": 9.781631292293668e-06, "loss": 0.5299, "step": 1250 }, { "epoch": 0.12175182481751824, "grad_norm": 1.2965362290198492, "learning_rate": 9.781170342393702e-06, "loss": 0.4161, "step": 1251 }, { "epoch": 0.12184914841849148, "grad_norm": 1.4753461399295356, "learning_rate": 9.780708917385901e-06, "loss": 0.5379, "step": 1252 }, { "epoch": 0.12194647201946472, "grad_norm": 0.9509628974965367, "learning_rate": 9.780247017316115e-06, "loss": 0.2681, "step": 1253 }, { "epoch": 0.12204379562043796, "grad_norm": 1.3308735848114122, "learning_rate": 9.779784642230246e-06, "loss": 0.4247, "step": 1254 }, { "epoch": 0.12214111922141119, "grad_norm": 1.1206835484781008, "learning_rate": 9.779321792174239e-06, "loss": 0.2301, "step": 1255 }, { "epoch": 0.12223844282238443, "grad_norm": 1.2598096263209464, "learning_rate": 9.778858467194087e-06, "loss": 0.3163, "step": 1256 }, { "epoch": 0.12233576642335767, "grad_norm": 1.4871998460052394, "learning_rate": 9.778394667335834e-06, "loss": 0.3433, "step": 1257 }, { "epoch": 0.1224330900243309, "grad_norm": 1.384245738588718, "learning_rate": 9.777930392645565e-06, "loss": 0.2111, "step": 1258 }, { "epoch": 0.12253041362530413, "grad_norm": 1.4369061113982475, "learning_rate": 9.777465643169417e-06, "loss": 0.3895, "step": 1259 }, { "epoch": 0.12262773722627737, "grad_norm": 1.8558638944994366, "learning_rate": 9.777000418953568e-06, "loss": 0.3388, "step": 1260 }, { "epoch": 0.12272506082725061, "grad_norm": 1.512984108492842, "learning_rate": 9.776534720044255e-06, "loss": 0.4726, "step": 1261 }, { "epoch": 0.12282238442822384, "grad_norm": 1.367540412040702, "learning_rate": 9.77606854648775e-06, "loss": 0.2684, "step": 1262 }, { "epoch": 0.12291970802919708, "grad_norm": 1.2042550068870583, "learning_rate": 9.775601898330377e-06, "loss": 0.2173, "step": 1263 }, { "epoch": 0.12301703163017032, "grad_norm": 1.5842484372844456, "learning_rate": 9.775134775618509e-06, "loss": 0.5608, "step": 1264 }, { "epoch": 0.12311435523114356, "grad_norm": 1.397447971201202, "learning_rate": 9.774667178398562e-06, "loss": 0.4632, "step": 1265 }, { "epoch": 0.12321167883211678, "grad_norm": 1.3468996882112099, "learning_rate": 9.774199106717004e-06, "loss": 0.3697, "step": 1266 }, { "epoch": 0.12330900243309002, "grad_norm": 1.252677053550249, "learning_rate": 9.773730560620345e-06, "loss": 0.2377, "step": 1267 }, { "epoch": 0.12340632603406326, "grad_norm": 1.4179546260918483, "learning_rate": 9.773261540155148e-06, "loss": 0.4857, "step": 1268 }, { "epoch": 0.1235036496350365, "grad_norm": 1.3092572252570605, "learning_rate": 9.772792045368015e-06, "loss": 0.2969, "step": 1269 }, { "epoch": 0.12360097323600973, "grad_norm": 1.7901486760202572, "learning_rate": 9.772322076305607e-06, "loss": 0.6935, "step": 1270 }, { "epoch": 0.12369829683698297, "grad_norm": 1.5982523135009328, "learning_rate": 9.771851633014618e-06, "loss": 0.4368, "step": 1271 }, { "epoch": 0.12379562043795621, "grad_norm": 1.195950207110724, "learning_rate": 9.7713807155418e-06, "loss": 0.3202, "step": 1272 }, { "epoch": 0.12389294403892943, "grad_norm": 1.352519407714817, "learning_rate": 9.770909323933947e-06, "loss": 0.4284, "step": 1273 }, { "epoch": 0.12399026763990267, "grad_norm": 1.4231425912579843, "learning_rate": 9.770437458237903e-06, "loss": 0.434, "step": 1274 }, { "epoch": 0.12408759124087591, "grad_norm": 1.2825234760121222, "learning_rate": 9.769965118500555e-06, "loss": 0.3817, "step": 1275 }, { "epoch": 0.12418491484184915, "grad_norm": 1.8250797045299043, "learning_rate": 9.769492304768843e-06, "loss": 0.7366, "step": 1276 }, { "epoch": 0.12428223844282238, "grad_norm": 1.3974167065714918, "learning_rate": 9.769019017089748e-06, "loss": 0.2804, "step": 1277 }, { "epoch": 0.12437956204379562, "grad_norm": 1.2933017267383033, "learning_rate": 9.768545255510302e-06, "loss": 0.3495, "step": 1278 }, { "epoch": 0.12447688564476886, "grad_norm": 1.2423501538003798, "learning_rate": 9.768071020077584e-06, "loss": 0.2908, "step": 1279 }, { "epoch": 0.1245742092457421, "grad_norm": 1.8228975858143868, "learning_rate": 9.767596310838718e-06, "loss": 0.4222, "step": 1280 }, { "epoch": 0.12467153284671532, "grad_norm": 1.5510872411682606, "learning_rate": 9.767121127840874e-06, "loss": 0.5058, "step": 1281 }, { "epoch": 0.12476885644768856, "grad_norm": 1.6665778692750302, "learning_rate": 9.766645471131278e-06, "loss": 0.3592, "step": 1282 }, { "epoch": 0.1248661800486618, "grad_norm": 1.5396481092124317, "learning_rate": 9.766169340757187e-06, "loss": 0.2737, "step": 1283 }, { "epoch": 0.12496350364963503, "grad_norm": 1.5555229817491858, "learning_rate": 9.765692736765922e-06, "loss": 0.5466, "step": 1284 }, { "epoch": 0.12506082725060827, "grad_norm": 1.5351601326386175, "learning_rate": 9.765215659204838e-06, "loss": 0.4733, "step": 1285 }, { "epoch": 0.1251581508515815, "grad_norm": 1.2793773363741519, "learning_rate": 9.764738108121347e-06, "loss": 0.3056, "step": 1286 }, { "epoch": 0.12525547445255475, "grad_norm": 1.6331577939793205, "learning_rate": 9.764260083562902e-06, "loss": 0.5883, "step": 1287 }, { "epoch": 0.12535279805352798, "grad_norm": 1.3363728845544067, "learning_rate": 9.763781585577003e-06, "loss": 0.2904, "step": 1288 }, { "epoch": 0.12545012165450123, "grad_norm": 1.360818732035961, "learning_rate": 9.763302614211199e-06, "loss": 0.4202, "step": 1289 }, { "epoch": 0.12554744525547445, "grad_norm": 1.3103877737057137, "learning_rate": 9.762823169513089e-06, "loss": 0.4694, "step": 1290 }, { "epoch": 0.12564476885644768, "grad_norm": 1.1848446118808063, "learning_rate": 9.76234325153031e-06, "loss": 0.2265, "step": 1291 }, { "epoch": 0.12574209245742093, "grad_norm": 1.3494947194310234, "learning_rate": 9.761862860310558e-06, "loss": 0.2382, "step": 1292 }, { "epoch": 0.12583941605839416, "grad_norm": 1.7062717031139596, "learning_rate": 9.761381995901564e-06, "loss": 0.7254, "step": 1293 }, { "epoch": 0.12593673965936739, "grad_norm": 1.208337515242783, "learning_rate": 9.760900658351117e-06, "loss": 0.326, "step": 1294 }, { "epoch": 0.12603406326034064, "grad_norm": 1.3159841432369768, "learning_rate": 9.760418847707043e-06, "loss": 0.3438, "step": 1295 }, { "epoch": 0.12613138686131387, "grad_norm": 1.3809255020300633, "learning_rate": 9.759936564017223e-06, "loss": 0.2716, "step": 1296 }, { "epoch": 0.1262287104622871, "grad_norm": 1.3382917039666673, "learning_rate": 9.759453807329582e-06, "loss": 0.2882, "step": 1297 }, { "epoch": 0.12632603406326035, "grad_norm": 1.3572918507167704, "learning_rate": 9.75897057769209e-06, "loss": 0.4181, "step": 1298 }, { "epoch": 0.12642335766423357, "grad_norm": 1.4433440128897468, "learning_rate": 9.758486875152766e-06, "loss": 0.4883, "step": 1299 }, { "epoch": 0.12652068126520682, "grad_norm": 1.1934091211117765, "learning_rate": 9.758002699759677e-06, "loss": 0.3828, "step": 1300 }, { "epoch": 0.12661800486618005, "grad_norm": 1.4647925609545562, "learning_rate": 9.757518051560935e-06, "loss": 0.402, "step": 1301 }, { "epoch": 0.12671532846715328, "grad_norm": 1.658517832372951, "learning_rate": 9.7570329306047e-06, "loss": 0.6752, "step": 1302 }, { "epoch": 0.12681265206812653, "grad_norm": 1.2682494280043264, "learning_rate": 9.75654733693918e-06, "loss": 0.2786, "step": 1303 }, { "epoch": 0.12690997566909976, "grad_norm": 1.3919267883395627, "learning_rate": 9.756061270612625e-06, "loss": 0.4806, "step": 1304 }, { "epoch": 0.12700729927007298, "grad_norm": 1.160118847382142, "learning_rate": 9.75557473167334e-06, "loss": 0.2458, "step": 1305 }, { "epoch": 0.12710462287104624, "grad_norm": 1.482640427472728, "learning_rate": 9.755087720169672e-06, "loss": 0.527, "step": 1306 }, { "epoch": 0.12720194647201946, "grad_norm": 1.5068875178509769, "learning_rate": 9.75460023615001e-06, "loss": 0.4985, "step": 1307 }, { "epoch": 0.1272992700729927, "grad_norm": 1.2878541774064265, "learning_rate": 9.754112279662805e-06, "loss": 0.3478, "step": 1308 }, { "epoch": 0.12739659367396594, "grad_norm": 1.1398490461157162, "learning_rate": 9.75362385075654e-06, "loss": 0.3084, "step": 1309 }, { "epoch": 0.12749391727493917, "grad_norm": 1.2924420070365765, "learning_rate": 9.75313494947975e-06, "loss": 0.3919, "step": 1310 }, { "epoch": 0.12759124087591242, "grad_norm": 1.4558696462945964, "learning_rate": 9.752645575881018e-06, "loss": 0.225, "step": 1311 }, { "epoch": 0.12768856447688565, "grad_norm": 1.677251779693783, "learning_rate": 9.752155730008974e-06, "loss": 0.4831, "step": 1312 }, { "epoch": 0.12778588807785887, "grad_norm": 1.3350720195417478, "learning_rate": 9.751665411912294e-06, "loss": 0.4371, "step": 1313 }, { "epoch": 0.12788321167883213, "grad_norm": 1.3653039655289896, "learning_rate": 9.751174621639702e-06, "loss": 0.4051, "step": 1314 }, { "epoch": 0.12798053527980535, "grad_norm": 2.0214110135389927, "learning_rate": 9.75068335923997e-06, "loss": 0.4971, "step": 1315 }, { "epoch": 0.12807785888077858, "grad_norm": 1.7144522600221743, "learning_rate": 9.750191624761909e-06, "loss": 0.6353, "step": 1316 }, { "epoch": 0.12817518248175183, "grad_norm": 1.61491787633751, "learning_rate": 9.749699418254388e-06, "loss": 0.5408, "step": 1317 }, { "epoch": 0.12827250608272506, "grad_norm": 1.3029361322695596, "learning_rate": 9.749206739766317e-06, "loss": 0.407, "step": 1318 }, { "epoch": 0.12836982968369828, "grad_norm": 1.2453189940624274, "learning_rate": 9.748713589346652e-06, "loss": 0.3254, "step": 1319 }, { "epoch": 0.12846715328467154, "grad_norm": 1.4117795102544664, "learning_rate": 9.748219967044398e-06, "loss": 0.3941, "step": 1320 }, { "epoch": 0.12856447688564476, "grad_norm": 1.4197813276706028, "learning_rate": 9.74772587290861e-06, "loss": 0.3454, "step": 1321 }, { "epoch": 0.12866180048661802, "grad_norm": 1.3133599325252279, "learning_rate": 9.747231306988381e-06, "loss": 0.3389, "step": 1322 }, { "epoch": 0.12875912408759124, "grad_norm": 1.3432229805022793, "learning_rate": 9.746736269332861e-06, "loss": 0.469, "step": 1323 }, { "epoch": 0.12885644768856447, "grad_norm": 1.1244292400820686, "learning_rate": 9.746240759991241e-06, "loss": 0.3674, "step": 1324 }, { "epoch": 0.12895377128953772, "grad_norm": 1.4966792860681473, "learning_rate": 9.745744779012758e-06, "loss": 0.4308, "step": 1325 }, { "epoch": 0.12905109489051095, "grad_norm": 1.5238028846181695, "learning_rate": 9.745248326446699e-06, "loss": 0.4213, "step": 1326 }, { "epoch": 0.12914841849148417, "grad_norm": 1.3633303920337936, "learning_rate": 9.744751402342398e-06, "loss": 0.438, "step": 1327 }, { "epoch": 0.12924574209245743, "grad_norm": 1.3260493495785517, "learning_rate": 9.744254006749235e-06, "loss": 0.4762, "step": 1328 }, { "epoch": 0.12934306569343065, "grad_norm": 1.705738477220435, "learning_rate": 9.743756139716634e-06, "loss": 0.5861, "step": 1329 }, { "epoch": 0.12944038929440388, "grad_norm": 1.5829201544013396, "learning_rate": 9.743257801294069e-06, "loss": 0.469, "step": 1330 }, { "epoch": 0.12953771289537713, "grad_norm": 1.1445128143179795, "learning_rate": 9.74275899153106e-06, "loss": 0.4018, "step": 1331 }, { "epoch": 0.12963503649635036, "grad_norm": 1.2900129109113572, "learning_rate": 9.742259710477178e-06, "loss": 0.3802, "step": 1332 }, { "epoch": 0.1297323600973236, "grad_norm": 1.3212461161488713, "learning_rate": 9.74175995818203e-06, "loss": 0.3725, "step": 1333 }, { "epoch": 0.12982968369829684, "grad_norm": 1.3979706650986563, "learning_rate": 9.741259734695283e-06, "loss": 0.3961, "step": 1334 }, { "epoch": 0.12992700729927006, "grad_norm": 1.2642819849441118, "learning_rate": 9.740759040066642e-06, "loss": 0.3528, "step": 1335 }, { "epoch": 0.13002433090024332, "grad_norm": 1.7776493019463793, "learning_rate": 9.74025787434586e-06, "loss": 0.6424, "step": 1336 }, { "epoch": 0.13012165450121654, "grad_norm": 1.1885806737857232, "learning_rate": 9.73975623758274e-06, "loss": 0.3163, "step": 1337 }, { "epoch": 0.13021897810218977, "grad_norm": 1.7443954093720497, "learning_rate": 9.739254129827131e-06, "loss": 0.7263, "step": 1338 }, { "epoch": 0.13031630170316302, "grad_norm": 1.7005058938305366, "learning_rate": 9.738751551128924e-06, "loss": 0.5204, "step": 1339 }, { "epoch": 0.13041362530413625, "grad_norm": 6.598521165184121, "learning_rate": 9.738248501538063e-06, "loss": 0.5113, "step": 1340 }, { "epoch": 0.1305109489051095, "grad_norm": 1.6203066466178853, "learning_rate": 9.737744981104536e-06, "loss": 0.625, "step": 1341 }, { "epoch": 0.13060827250608273, "grad_norm": 1.548111392574701, "learning_rate": 9.73724098987838e-06, "loss": 0.3952, "step": 1342 }, { "epoch": 0.13070559610705595, "grad_norm": 1.4871418112966692, "learning_rate": 9.736736527909674e-06, "loss": 0.5084, "step": 1343 }, { "epoch": 0.1308029197080292, "grad_norm": 1.0723677900938815, "learning_rate": 9.736231595248546e-06, "loss": 0.255, "step": 1344 }, { "epoch": 0.13090024330900243, "grad_norm": 1.5695490713137843, "learning_rate": 9.735726191945176e-06, "loss": 0.3438, "step": 1345 }, { "epoch": 0.13099756690997566, "grad_norm": 2.059617079542521, "learning_rate": 9.73522031804978e-06, "loss": 0.5249, "step": 1346 }, { "epoch": 0.1310948905109489, "grad_norm": 1.5301765260275246, "learning_rate": 9.734713973612633e-06, "loss": 0.3667, "step": 1347 }, { "epoch": 0.13119221411192214, "grad_norm": 1.7431028553023509, "learning_rate": 9.734207158684048e-06, "loss": 0.5551, "step": 1348 }, { "epoch": 0.13128953771289537, "grad_norm": 1.2916959738739295, "learning_rate": 9.733699873314388e-06, "loss": 0.278, "step": 1349 }, { "epoch": 0.13138686131386862, "grad_norm": 1.5891072584842363, "learning_rate": 9.733192117554062e-06, "loss": 0.4139, "step": 1350 }, { "epoch": 0.13148418491484185, "grad_norm": 1.6366778166029219, "learning_rate": 9.732683891453528e-06, "loss": 0.4888, "step": 1351 }, { "epoch": 0.1315815085158151, "grad_norm": 1.6763551525158185, "learning_rate": 9.732175195063283e-06, "loss": 0.5432, "step": 1352 }, { "epoch": 0.13167883211678832, "grad_norm": 1.551593865483807, "learning_rate": 9.731666028433882e-06, "loss": 0.5634, "step": 1353 }, { "epoch": 0.13177615571776155, "grad_norm": 1.693219206573502, "learning_rate": 9.731156391615919e-06, "loss": 0.4554, "step": 1354 }, { "epoch": 0.1318734793187348, "grad_norm": 1.4894832853139421, "learning_rate": 9.730646284660037e-06, "loss": 0.4286, "step": 1355 }, { "epoch": 0.13197080291970803, "grad_norm": 1.20058966692396, "learning_rate": 9.730135707616927e-06, "loss": 0.2519, "step": 1356 }, { "epoch": 0.13206812652068126, "grad_norm": 1.395115321325138, "learning_rate": 9.729624660537324e-06, "loss": 0.3718, "step": 1357 }, { "epoch": 0.1321654501216545, "grad_norm": 1.3441869335850034, "learning_rate": 9.729113143472011e-06, "loss": 0.43, "step": 1358 }, { "epoch": 0.13226277372262774, "grad_norm": 1.31865416445236, "learning_rate": 9.72860115647182e-06, "loss": 0.296, "step": 1359 }, { "epoch": 0.13236009732360096, "grad_norm": 1.3998148863889133, "learning_rate": 9.728088699587623e-06, "loss": 0.2642, "step": 1360 }, { "epoch": 0.13245742092457422, "grad_norm": 1.5917388343760925, "learning_rate": 9.727575772870347e-06, "loss": 0.5999, "step": 1361 }, { "epoch": 0.13255474452554744, "grad_norm": 1.6062441992747731, "learning_rate": 9.727062376370962e-06, "loss": 0.6017, "step": 1362 }, { "epoch": 0.1326520681265207, "grad_norm": 1.756325054261889, "learning_rate": 9.72654851014048e-06, "loss": 0.5855, "step": 1363 }, { "epoch": 0.13274939172749392, "grad_norm": 1.5782112626775713, "learning_rate": 9.72603417422997e-06, "loss": 0.5643, "step": 1364 }, { "epoch": 0.13284671532846715, "grad_norm": 1.6280008631148617, "learning_rate": 9.725519368690539e-06, "loss": 0.3918, "step": 1365 }, { "epoch": 0.1329440389294404, "grad_norm": 1.731476294535625, "learning_rate": 9.725004093573343e-06, "loss": 0.6909, "step": 1366 }, { "epoch": 0.13304136253041363, "grad_norm": 1.7012591859680217, "learning_rate": 9.724488348929587e-06, "loss": 0.3206, "step": 1367 }, { "epoch": 0.13313868613138685, "grad_norm": 1.5539166250213363, "learning_rate": 9.723972134810519e-06, "loss": 0.3735, "step": 1368 }, { "epoch": 0.1332360097323601, "grad_norm": 1.2431527472113675, "learning_rate": 9.723455451267436e-06, "loss": 0.4023, "step": 1369 }, { "epoch": 0.13333333333333333, "grad_norm": 1.4147928785913308, "learning_rate": 9.722938298351682e-06, "loss": 0.4501, "step": 1370 }, { "epoch": 0.13343065693430656, "grad_norm": 1.5209109752466956, "learning_rate": 9.722420676114646e-06, "loss": 0.4504, "step": 1371 }, { "epoch": 0.1335279805352798, "grad_norm": 1.6031977794999224, "learning_rate": 9.721902584607766e-06, "loss": 0.4036, "step": 1372 }, { "epoch": 0.13362530413625304, "grad_norm": 1.3752266957066934, "learning_rate": 9.721384023882524e-06, "loss": 0.4008, "step": 1373 }, { "epoch": 0.1337226277372263, "grad_norm": 1.2289957585024915, "learning_rate": 9.720864993990448e-06, "loss": 0.3214, "step": 1374 }, { "epoch": 0.13381995133819952, "grad_norm": 1.5334770200964671, "learning_rate": 9.720345494983117e-06, "loss": 0.4103, "step": 1375 }, { "epoch": 0.13391727493917274, "grad_norm": 1.4428318489533865, "learning_rate": 9.719825526912152e-06, "loss": 0.4314, "step": 1376 }, { "epoch": 0.134014598540146, "grad_norm": 1.6794168653476527, "learning_rate": 9.719305089829224e-06, "loss": 0.6027, "step": 1377 }, { "epoch": 0.13411192214111922, "grad_norm": 1.4695816931820398, "learning_rate": 9.718784183786048e-06, "loss": 0.5337, "step": 1378 }, { "epoch": 0.13420924574209245, "grad_norm": 1.428180254445363, "learning_rate": 9.718262808834386e-06, "loss": 0.3636, "step": 1379 }, { "epoch": 0.1343065693430657, "grad_norm": 1.4446624118640763, "learning_rate": 9.717740965026051e-06, "loss": 0.4213, "step": 1380 }, { "epoch": 0.13440389294403893, "grad_norm": 1.0145899854020284, "learning_rate": 9.717218652412896e-06, "loss": 0.292, "step": 1381 }, { "epoch": 0.13450121654501215, "grad_norm": 1.4589445831305994, "learning_rate": 9.716695871046824e-06, "loss": 0.4787, "step": 1382 }, { "epoch": 0.1345985401459854, "grad_norm": 1.5462880417778382, "learning_rate": 9.716172620979783e-06, "loss": 0.4716, "step": 1383 }, { "epoch": 0.13469586374695863, "grad_norm": 1.5411526965931406, "learning_rate": 9.71564890226377e-06, "loss": 0.5311, "step": 1384 }, { "epoch": 0.1347931873479319, "grad_norm": 1.1928924795974905, "learning_rate": 9.71512471495083e-06, "loss": 0.2724, "step": 1385 }, { "epoch": 0.1348905109489051, "grad_norm": 1.3201585939530793, "learning_rate": 9.714600059093045e-06, "loss": 0.2987, "step": 1386 }, { "epoch": 0.13498783454987834, "grad_norm": 1.5687746327202647, "learning_rate": 9.714074934742556e-06, "loss": 0.363, "step": 1387 }, { "epoch": 0.1350851581508516, "grad_norm": 1.5338932500845779, "learning_rate": 9.713549341951543e-06, "loss": 0.5661, "step": 1388 }, { "epoch": 0.13518248175182482, "grad_norm": 1.1601153536694444, "learning_rate": 9.713023280772236e-06, "loss": 0.3079, "step": 1389 }, { "epoch": 0.13527980535279804, "grad_norm": 1.3983637614495477, "learning_rate": 9.712496751256907e-06, "loss": 0.4741, "step": 1390 }, { "epoch": 0.1353771289537713, "grad_norm": 1.2378967843544995, "learning_rate": 9.71196975345788e-06, "loss": 0.3467, "step": 1391 }, { "epoch": 0.13547445255474452, "grad_norm": 1.3128641622430697, "learning_rate": 9.711442287427523e-06, "loss": 0.413, "step": 1392 }, { "epoch": 0.13557177615571775, "grad_norm": 1.6638151172989781, "learning_rate": 9.71091435321825e-06, "loss": 0.4844, "step": 1393 }, { "epoch": 0.135669099756691, "grad_norm": 1.5023430961651105, "learning_rate": 9.710385950882522e-06, "loss": 0.3639, "step": 1394 }, { "epoch": 0.13576642335766423, "grad_norm": 1.3286069107884302, "learning_rate": 9.709857080472847e-06, "loss": 0.4055, "step": 1395 }, { "epoch": 0.13586374695863748, "grad_norm": 1.2934746343236392, "learning_rate": 9.709327742041776e-06, "loss": 0.2837, "step": 1396 }, { "epoch": 0.1359610705596107, "grad_norm": 1.698077360010743, "learning_rate": 9.708797935641915e-06, "loss": 0.3687, "step": 1397 }, { "epoch": 0.13605839416058393, "grad_norm": 1.412271661785088, "learning_rate": 9.70826766132591e-06, "loss": 0.3577, "step": 1398 }, { "epoch": 0.1361557177615572, "grad_norm": 1.5421055950766074, "learning_rate": 9.707736919146453e-06, "loss": 0.5394, "step": 1399 }, { "epoch": 0.1362530413625304, "grad_norm": 2.554386599490806, "learning_rate": 9.707205709156285e-06, "loss": 0.212, "step": 1400 }, { "epoch": 0.13635036496350364, "grad_norm": 1.7436805109650844, "learning_rate": 9.70667403140819e-06, "loss": 0.6893, "step": 1401 }, { "epoch": 0.1364476885644769, "grad_norm": 1.613527884115612, "learning_rate": 9.706141885955006e-06, "loss": 0.42, "step": 1402 }, { "epoch": 0.13654501216545012, "grad_norm": 1.711619341430359, "learning_rate": 9.70560927284961e-06, "loss": 0.7025, "step": 1403 }, { "epoch": 0.13664233576642335, "grad_norm": 1.5376532439489434, "learning_rate": 9.705076192144927e-06, "loss": 0.5201, "step": 1404 }, { "epoch": 0.1367396593673966, "grad_norm": 1.492510855426001, "learning_rate": 9.704542643893931e-06, "loss": 0.4281, "step": 1405 }, { "epoch": 0.13683698296836982, "grad_norm": 1.5678573317920237, "learning_rate": 9.704008628149641e-06, "loss": 0.506, "step": 1406 }, { "epoch": 0.13693430656934308, "grad_norm": 1.3237691920747017, "learning_rate": 9.703474144965123e-06, "loss": 0.4114, "step": 1407 }, { "epoch": 0.1370316301703163, "grad_norm": 1.4134135574988251, "learning_rate": 9.702939194393489e-06, "loss": 0.3806, "step": 1408 }, { "epoch": 0.13712895377128953, "grad_norm": 1.5544258549266206, "learning_rate": 9.702403776487895e-06, "loss": 0.4863, "step": 1409 }, { "epoch": 0.13722627737226278, "grad_norm": 1.3619063912879554, "learning_rate": 9.701867891301548e-06, "loss": 0.3692, "step": 1410 }, { "epoch": 0.137323600973236, "grad_norm": 1.5146665393724075, "learning_rate": 9.701331538887699e-06, "loss": 0.3311, "step": 1411 }, { "epoch": 0.13742092457420924, "grad_norm": 1.5674647990142176, "learning_rate": 9.700794719299644e-06, "loss": 0.5292, "step": 1412 }, { "epoch": 0.1375182481751825, "grad_norm": 1.4711236643775818, "learning_rate": 9.700257432590729e-06, "loss": 0.466, "step": 1413 }, { "epoch": 0.13761557177615572, "grad_norm": 1.4410106250389758, "learning_rate": 9.699719678814345e-06, "loss": 0.3276, "step": 1414 }, { "epoch": 0.13771289537712894, "grad_norm": 1.652937978394441, "learning_rate": 9.699181458023927e-06, "loss": 0.5391, "step": 1415 }, { "epoch": 0.1378102189781022, "grad_norm": 1.7285587973510355, "learning_rate": 9.698642770272959e-06, "loss": 0.5707, "step": 1416 }, { "epoch": 0.13790754257907542, "grad_norm": 1.325058257423692, "learning_rate": 9.698103615614972e-06, "loss": 0.3429, "step": 1417 }, { "epoch": 0.13800486618004867, "grad_norm": 1.5653351048996198, "learning_rate": 9.69756399410354e-06, "loss": 0.4132, "step": 1418 }, { "epoch": 0.1381021897810219, "grad_norm": 1.603805088396393, "learning_rate": 9.697023905792287e-06, "loss": 0.4983, "step": 1419 }, { "epoch": 0.13819951338199513, "grad_norm": 1.5052443063346659, "learning_rate": 9.69648335073488e-06, "loss": 0.2713, "step": 1420 }, { "epoch": 0.13829683698296838, "grad_norm": 1.30196768692164, "learning_rate": 9.695942328985037e-06, "loss": 0.27, "step": 1421 }, { "epoch": 0.1383941605839416, "grad_norm": 1.1542739478608208, "learning_rate": 9.695400840596519e-06, "loss": 0.3309, "step": 1422 }, { "epoch": 0.13849148418491483, "grad_norm": 1.1029138054910885, "learning_rate": 9.694858885623132e-06, "loss": 0.3262, "step": 1423 }, { "epoch": 0.13858880778588809, "grad_norm": 1.581389120261872, "learning_rate": 9.694316464118732e-06, "loss": 0.4663, "step": 1424 }, { "epoch": 0.1386861313868613, "grad_norm": 1.2966198038166061, "learning_rate": 9.69377357613722e-06, "loss": 0.336, "step": 1425 }, { "epoch": 0.13878345498783454, "grad_norm": 1.505634533514273, "learning_rate": 9.693230221732544e-06, "loss": 0.4269, "step": 1426 }, { "epoch": 0.1388807785888078, "grad_norm": 1.274453115047599, "learning_rate": 9.692686400958695e-06, "loss": 0.3978, "step": 1427 }, { "epoch": 0.13897810218978102, "grad_norm": 1.2126154933077449, "learning_rate": 9.692142113869714e-06, "loss": 0.2754, "step": 1428 }, { "epoch": 0.13907542579075427, "grad_norm": 1.4884313472642259, "learning_rate": 9.691597360519686e-06, "loss": 0.4661, "step": 1429 }, { "epoch": 0.1391727493917275, "grad_norm": 1.5680101511782372, "learning_rate": 9.691052140962747e-06, "loss": 0.4237, "step": 1430 }, { "epoch": 0.13927007299270072, "grad_norm": 1.325640699841282, "learning_rate": 9.690506455253073e-06, "loss": 0.3988, "step": 1431 }, { "epoch": 0.13936739659367398, "grad_norm": 1.3107002270910884, "learning_rate": 9.689960303444887e-06, "loss": 0.4268, "step": 1432 }, { "epoch": 0.1394647201946472, "grad_norm": 1.9246823036308274, "learning_rate": 9.689413685592465e-06, "loss": 0.3733, "step": 1433 }, { "epoch": 0.13956204379562043, "grad_norm": 1.3731854343094059, "learning_rate": 9.688866601750122e-06, "loss": 0.4215, "step": 1434 }, { "epoch": 0.13965936739659368, "grad_norm": 1.368964734934982, "learning_rate": 9.688319051972224e-06, "loss": 0.4697, "step": 1435 }, { "epoch": 0.1397566909975669, "grad_norm": 1.3451140821212522, "learning_rate": 9.687771036313178e-06, "loss": 0.3741, "step": 1436 }, { "epoch": 0.13985401459854013, "grad_norm": 1.5372748667563303, "learning_rate": 9.687222554827444e-06, "loss": 0.4199, "step": 1437 }, { "epoch": 0.1399513381995134, "grad_norm": 1.1780522614950486, "learning_rate": 9.686673607569526e-06, "loss": 0.3602, "step": 1438 }, { "epoch": 0.1400486618004866, "grad_norm": 1.20778383169165, "learning_rate": 9.686124194593967e-06, "loss": 0.23, "step": 1439 }, { "epoch": 0.14014598540145987, "grad_norm": 1.6760972087501165, "learning_rate": 9.685574315955368e-06, "loss": 0.5089, "step": 1440 }, { "epoch": 0.1402433090024331, "grad_norm": 1.7963497555189056, "learning_rate": 9.68502397170837e-06, "loss": 0.3932, "step": 1441 }, { "epoch": 0.14034063260340632, "grad_norm": 1.401968265514402, "learning_rate": 9.68447316190766e-06, "loss": 0.4272, "step": 1442 }, { "epoch": 0.14043795620437957, "grad_norm": 1.1461895591250986, "learning_rate": 9.683921886607973e-06, "loss": 0.3003, "step": 1443 }, { "epoch": 0.1405352798053528, "grad_norm": 1.8257595963636586, "learning_rate": 9.683370145864089e-06, "loss": 0.4454, "step": 1444 }, { "epoch": 0.14063260340632602, "grad_norm": 1.3483599166387192, "learning_rate": 9.682817939730833e-06, "loss": 0.3708, "step": 1445 }, { "epoch": 0.14072992700729928, "grad_norm": 1.4560700792487955, "learning_rate": 9.682265268263083e-06, "loss": 0.4321, "step": 1446 }, { "epoch": 0.1408272506082725, "grad_norm": 1.4364952224667933, "learning_rate": 9.681712131515753e-06, "loss": 0.3812, "step": 1447 }, { "epoch": 0.14092457420924573, "grad_norm": 1.6808986821455574, "learning_rate": 9.681158529543812e-06, "loss": 0.3939, "step": 1448 }, { "epoch": 0.14102189781021898, "grad_norm": 1.5327313322922438, "learning_rate": 9.68060446240227e-06, "loss": 0.3617, "step": 1449 }, { "epoch": 0.1411192214111922, "grad_norm": 1.9055650449775412, "learning_rate": 9.680049930146186e-06, "loss": 0.4984, "step": 1450 }, { "epoch": 0.14121654501216546, "grad_norm": 1.8971706606162055, "learning_rate": 9.679494932830664e-06, "loss": 0.4196, "step": 1451 }, { "epoch": 0.1413138686131387, "grad_norm": 1.7337796675846617, "learning_rate": 9.678939470510856e-06, "loss": 0.4282, "step": 1452 }, { "epoch": 0.1414111922141119, "grad_norm": 1.6436762455975924, "learning_rate": 9.678383543241954e-06, "loss": 0.425, "step": 1453 }, { "epoch": 0.14150851581508517, "grad_norm": 1.3304471527694197, "learning_rate": 9.677827151079205e-06, "loss": 0.346, "step": 1454 }, { "epoch": 0.1416058394160584, "grad_norm": 1.3162532004293022, "learning_rate": 9.677270294077895e-06, "loss": 0.4492, "step": 1455 }, { "epoch": 0.14170316301703162, "grad_norm": 1.2299075830057253, "learning_rate": 9.676712972293363e-06, "loss": 0.3525, "step": 1456 }, { "epoch": 0.14180048661800487, "grad_norm": 1.7174455721253266, "learning_rate": 9.676155185780989e-06, "loss": 0.763, "step": 1457 }, { "epoch": 0.1418978102189781, "grad_norm": 0.9624475539149472, "learning_rate": 9.675596934596198e-06, "loss": 0.2234, "step": 1458 }, { "epoch": 0.14199513381995132, "grad_norm": 1.380722751360302, "learning_rate": 9.675038218794469e-06, "loss": 0.3539, "step": 1459 }, { "epoch": 0.14209245742092458, "grad_norm": 1.3595616004290971, "learning_rate": 9.674479038431314e-06, "loss": 0.4356, "step": 1460 }, { "epoch": 0.1421897810218978, "grad_norm": 1.2777542247997187, "learning_rate": 9.673919393562308e-06, "loss": 0.3233, "step": 1461 }, { "epoch": 0.14228710462287106, "grad_norm": 1.23752096524445, "learning_rate": 9.673359284243055e-06, "loss": 0.405, "step": 1462 }, { "epoch": 0.14238442822384428, "grad_norm": 1.4547729172095425, "learning_rate": 9.672798710529222e-06, "loss": 0.5356, "step": 1463 }, { "epoch": 0.1424817518248175, "grad_norm": 1.5976011084855026, "learning_rate": 9.672237672476506e-06, "loss": 0.571, "step": 1464 }, { "epoch": 0.14257907542579076, "grad_norm": 1.4454139467669962, "learning_rate": 9.67167617014066e-06, "loss": 0.4556, "step": 1465 }, { "epoch": 0.142676399026764, "grad_norm": 1.5296734849172828, "learning_rate": 9.671114203577485e-06, "loss": 0.5791, "step": 1466 }, { "epoch": 0.14277372262773722, "grad_norm": 1.0140913901893902, "learning_rate": 9.670551772842818e-06, "loss": 0.2732, "step": 1467 }, { "epoch": 0.14287104622871047, "grad_norm": 1.5600773149062541, "learning_rate": 9.669988877992551e-06, "loss": 0.3902, "step": 1468 }, { "epoch": 0.1429683698296837, "grad_norm": 1.4767158872669255, "learning_rate": 9.66942551908262e-06, "loss": 0.5531, "step": 1469 }, { "epoch": 0.14306569343065692, "grad_norm": 1.1134570066684917, "learning_rate": 9.668861696169003e-06, "loss": 0.278, "step": 1470 }, { "epoch": 0.14316301703163017, "grad_norm": 0.9776488708344422, "learning_rate": 9.66829740930773e-06, "loss": 0.231, "step": 1471 }, { "epoch": 0.1432603406326034, "grad_norm": 1.4647496714581032, "learning_rate": 9.667732658554875e-06, "loss": 0.485, "step": 1472 }, { "epoch": 0.14335766423357665, "grad_norm": 1.2234301570511203, "learning_rate": 9.667167443966557e-06, "loss": 0.3944, "step": 1473 }, { "epoch": 0.14345498783454988, "grad_norm": 1.3655487702696618, "learning_rate": 9.66660176559894e-06, "loss": 0.3989, "step": 1474 }, { "epoch": 0.1435523114355231, "grad_norm": 1.4690108372007447, "learning_rate": 9.666035623508238e-06, "loss": 0.4311, "step": 1475 }, { "epoch": 0.14364963503649636, "grad_norm": 1.1910374305057687, "learning_rate": 9.665469017750708e-06, "loss": 0.3002, "step": 1476 }, { "epoch": 0.14374695863746959, "grad_norm": 1.678176413091249, "learning_rate": 9.664901948382654e-06, "loss": 0.6143, "step": 1477 }, { "epoch": 0.1438442822384428, "grad_norm": 1.817046546881487, "learning_rate": 9.664334415460426e-06, "loss": 0.7811, "step": 1478 }, { "epoch": 0.14394160583941606, "grad_norm": 1.4955026439922687, "learning_rate": 9.663766419040422e-06, "loss": 0.411, "step": 1479 }, { "epoch": 0.1440389294403893, "grad_norm": 1.4198677231066263, "learning_rate": 9.66319795917908e-06, "loss": 0.4245, "step": 1480 }, { "epoch": 0.14413625304136254, "grad_norm": 1.5199876898969789, "learning_rate": 9.662629035932892e-06, "loss": 0.438, "step": 1481 }, { "epoch": 0.14423357664233577, "grad_norm": 1.4859771113526168, "learning_rate": 9.662059649358388e-06, "loss": 0.3949, "step": 1482 }, { "epoch": 0.144330900243309, "grad_norm": 1.5386966328542977, "learning_rate": 9.661489799512155e-06, "loss": 0.4679, "step": 1483 }, { "epoch": 0.14442822384428225, "grad_norm": 1.2872766782537612, "learning_rate": 9.660919486450813e-06, "loss": 0.2624, "step": 1484 }, { "epoch": 0.14452554744525548, "grad_norm": 1.3276179523832277, "learning_rate": 9.660348710231037e-06, "loss": 0.5476, "step": 1485 }, { "epoch": 0.1446228710462287, "grad_norm": 0.9490583621811937, "learning_rate": 9.659777470909547e-06, "loss": 0.2354, "step": 1486 }, { "epoch": 0.14472019464720196, "grad_norm": 1.3763558898436123, "learning_rate": 9.659205768543104e-06, "loss": 0.4327, "step": 1487 }, { "epoch": 0.14481751824817518, "grad_norm": 1.178366926128956, "learning_rate": 9.658633603188521e-06, "loss": 0.3839, "step": 1488 }, { "epoch": 0.1449148418491484, "grad_norm": 1.3255542333725456, "learning_rate": 9.658060974902653e-06, "loss": 0.3068, "step": 1489 }, { "epoch": 0.14501216545012166, "grad_norm": 1.5998345706772108, "learning_rate": 9.657487883742403e-06, "loss": 0.5432, "step": 1490 }, { "epoch": 0.1451094890510949, "grad_norm": 1.8804975658787435, "learning_rate": 9.656914329764718e-06, "loss": 0.5268, "step": 1491 }, { "epoch": 0.14520681265206814, "grad_norm": 1.5841269093835124, "learning_rate": 9.656340313026595e-06, "loss": 0.6304, "step": 1492 }, { "epoch": 0.14530413625304137, "grad_norm": 1.5832299483159056, "learning_rate": 9.655765833585072e-06, "loss": 0.4417, "step": 1493 }, { "epoch": 0.1454014598540146, "grad_norm": 1.2541361090062475, "learning_rate": 9.655190891497237e-06, "loss": 0.2956, "step": 1494 }, { "epoch": 0.14549878345498785, "grad_norm": 1.4549578520972333, "learning_rate": 9.654615486820223e-06, "loss": 0.5352, "step": 1495 }, { "epoch": 0.14559610705596107, "grad_norm": 1.4797996277102474, "learning_rate": 9.654039619611205e-06, "loss": 0.4915, "step": 1496 }, { "epoch": 0.1456934306569343, "grad_norm": 1.2281886698207842, "learning_rate": 9.65346328992741e-06, "loss": 0.1901, "step": 1497 }, { "epoch": 0.14579075425790755, "grad_norm": 1.4478972545758728, "learning_rate": 9.652886497826109e-06, "loss": 0.4142, "step": 1498 }, { "epoch": 0.14588807785888078, "grad_norm": 1.5883286963945868, "learning_rate": 9.652309243364614e-06, "loss": 0.3576, "step": 1499 }, { "epoch": 0.145985401459854, "grad_norm": 1.5369489845441549, "learning_rate": 9.651731526600293e-06, "loss": 0.5479, "step": 1500 }, { "epoch": 0.14608272506082726, "grad_norm": 1.5655077404950533, "learning_rate": 9.651153347590549e-06, "loss": 0.3464, "step": 1501 }, { "epoch": 0.14618004866180048, "grad_norm": 1.6426065013852054, "learning_rate": 9.65057470639284e-06, "loss": 0.5038, "step": 1502 }, { "epoch": 0.14627737226277374, "grad_norm": 1.8088684532537898, "learning_rate": 9.649995603064664e-06, "loss": 0.5731, "step": 1503 }, { "epoch": 0.14637469586374696, "grad_norm": 1.2493389766016731, "learning_rate": 9.649416037663564e-06, "loss": 0.3306, "step": 1504 }, { "epoch": 0.1464720194647202, "grad_norm": 1.5964615139072293, "learning_rate": 9.648836010247137e-06, "loss": 0.4169, "step": 1505 }, { "epoch": 0.14656934306569344, "grad_norm": 1.3925830899828215, "learning_rate": 9.648255520873018e-06, "loss": 0.3092, "step": 1506 }, { "epoch": 0.14666666666666667, "grad_norm": 1.4256860988690832, "learning_rate": 9.647674569598889e-06, "loss": 0.3201, "step": 1507 }, { "epoch": 0.1467639902676399, "grad_norm": 1.6614703553660697, "learning_rate": 9.647093156482483e-06, "loss": 0.6078, "step": 1508 }, { "epoch": 0.14686131386861315, "grad_norm": 1.4357097092225446, "learning_rate": 9.646511281581575e-06, "loss": 0.4004, "step": 1509 }, { "epoch": 0.14695863746958637, "grad_norm": 1.4562846462074024, "learning_rate": 9.645928944953981e-06, "loss": 0.4601, "step": 1510 }, { "epoch": 0.1470559610705596, "grad_norm": 1.3277232740610976, "learning_rate": 9.645346146657575e-06, "loss": 0.4015, "step": 1511 }, { "epoch": 0.14715328467153285, "grad_norm": 1.5964514332978577, "learning_rate": 9.644762886750267e-06, "loss": 0.4556, "step": 1512 }, { "epoch": 0.14725060827250608, "grad_norm": 1.4663379423625913, "learning_rate": 9.644179165290015e-06, "loss": 0.4353, "step": 1513 }, { "epoch": 0.14734793187347933, "grad_norm": 1.0949765548634744, "learning_rate": 9.643594982334826e-06, "loss": 0.2276, "step": 1514 }, { "epoch": 0.14744525547445256, "grad_norm": 1.563845779693575, "learning_rate": 9.643010337942749e-06, "loss": 0.6694, "step": 1515 }, { "epoch": 0.14754257907542578, "grad_norm": 1.024413538842663, "learning_rate": 9.642425232171881e-06, "loss": 0.3047, "step": 1516 }, { "epoch": 0.14763990267639904, "grad_norm": 1.712866405633365, "learning_rate": 9.641839665080363e-06, "loss": 0.6729, "step": 1517 }, { "epoch": 0.14773722627737226, "grad_norm": 1.4526258041869373, "learning_rate": 9.641253636726386e-06, "loss": 0.5037, "step": 1518 }, { "epoch": 0.1478345498783455, "grad_norm": 1.7375410582816389, "learning_rate": 9.640667147168182e-06, "loss": 0.6717, "step": 1519 }, { "epoch": 0.14793187347931874, "grad_norm": 1.736227335112512, "learning_rate": 9.640080196464032e-06, "loss": 0.6677, "step": 1520 }, { "epoch": 0.14802919708029197, "grad_norm": 1.5194007013329096, "learning_rate": 9.63949278467226e-06, "loss": 0.4288, "step": 1521 }, { "epoch": 0.1481265206812652, "grad_norm": 1.5063763039212688, "learning_rate": 9.638904911851237e-06, "loss": 0.4529, "step": 1522 }, { "epoch": 0.14822384428223845, "grad_norm": 1.6414876214129155, "learning_rate": 9.638316578059384e-06, "loss": 0.5482, "step": 1523 }, { "epoch": 0.14832116788321167, "grad_norm": 1.3122113228270877, "learning_rate": 9.63772778335516e-06, "loss": 0.3903, "step": 1524 }, { "epoch": 0.14841849148418493, "grad_norm": 1.6417051120393822, "learning_rate": 9.637138527797075e-06, "loss": 0.654, "step": 1525 }, { "epoch": 0.14851581508515815, "grad_norm": 1.2700043251684836, "learning_rate": 9.636548811443685e-06, "loss": 0.3338, "step": 1526 }, { "epoch": 0.14861313868613138, "grad_norm": 1.4124836827913858, "learning_rate": 9.63595863435359e-06, "loss": 0.3551, "step": 1527 }, { "epoch": 0.14871046228710463, "grad_norm": 1.3732601776051463, "learning_rate": 9.635367996585436e-06, "loss": 0.4212, "step": 1528 }, { "epoch": 0.14880778588807786, "grad_norm": 1.4785898006079692, "learning_rate": 9.634776898197916e-06, "loss": 0.416, "step": 1529 }, { "epoch": 0.14890510948905109, "grad_norm": 1.5889313350171215, "learning_rate": 9.634185339249766e-06, "loss": 0.5277, "step": 1530 }, { "epoch": 0.14900243309002434, "grad_norm": 1.7475817866143981, "learning_rate": 9.63359331979977e-06, "loss": 0.5202, "step": 1531 }, { "epoch": 0.14909975669099756, "grad_norm": 1.5329427899001755, "learning_rate": 9.633000839906758e-06, "loss": 0.4283, "step": 1532 }, { "epoch": 0.1491970802919708, "grad_norm": 1.3789605408265815, "learning_rate": 9.632407899629606e-06, "loss": 0.41, "step": 1533 }, { "epoch": 0.14929440389294404, "grad_norm": 1.725959361785896, "learning_rate": 9.631814499027233e-06, "loss": 0.6289, "step": 1534 }, { "epoch": 0.14939172749391727, "grad_norm": 1.5432692609357797, "learning_rate": 9.631220638158605e-06, "loss": 0.5, "step": 1535 }, { "epoch": 0.14948905109489052, "grad_norm": 1.6556108789068573, "learning_rate": 9.630626317082737e-06, "loss": 0.3819, "step": 1536 }, { "epoch": 0.14958637469586375, "grad_norm": 1.4498977098887442, "learning_rate": 9.630031535858686e-06, "loss": 0.4317, "step": 1537 }, { "epoch": 0.14968369829683698, "grad_norm": 1.1232180788321369, "learning_rate": 9.629436294545555e-06, "loss": 0.4004, "step": 1538 }, { "epoch": 0.14978102189781023, "grad_norm": 0.9949950497949807, "learning_rate": 9.628840593202494e-06, "loss": 0.2008, "step": 1539 }, { "epoch": 0.14987834549878346, "grad_norm": 1.431426278132333, "learning_rate": 9.628244431888699e-06, "loss": 0.3689, "step": 1540 }, { "epoch": 0.14997566909975668, "grad_norm": 1.575987397356523, "learning_rate": 9.627647810663407e-06, "loss": 0.5513, "step": 1541 }, { "epoch": 0.15007299270072993, "grad_norm": 1.5419042077794642, "learning_rate": 9.627050729585911e-06, "loss": 0.4614, "step": 1542 }, { "epoch": 0.15017031630170316, "grad_norm": 1.6695059275012083, "learning_rate": 9.626453188715539e-06, "loss": 0.5111, "step": 1543 }, { "epoch": 0.1502676399026764, "grad_norm": 1.5402255238707527, "learning_rate": 9.625855188111668e-06, "loss": 0.4209, "step": 1544 }, { "epoch": 0.15036496350364964, "grad_norm": 1.4645797288107798, "learning_rate": 9.625256727833726e-06, "loss": 0.4852, "step": 1545 }, { "epoch": 0.15046228710462287, "grad_norm": 2.0138530187225845, "learning_rate": 9.62465780794118e-06, "loss": 0.4272, "step": 1546 }, { "epoch": 0.15055961070559612, "grad_norm": 1.7939871096323345, "learning_rate": 9.624058428493543e-06, "loss": 0.3864, "step": 1547 }, { "epoch": 0.15065693430656935, "grad_norm": 1.5936734798237622, "learning_rate": 9.62345858955038e-06, "loss": 0.5951, "step": 1548 }, { "epoch": 0.15075425790754257, "grad_norm": 1.381736638575513, "learning_rate": 9.622858291171295e-06, "loss": 0.5078, "step": 1549 }, { "epoch": 0.15085158150851583, "grad_norm": 1.2680468052820635, "learning_rate": 9.622257533415939e-06, "loss": 0.3314, "step": 1550 }, { "epoch": 0.15094890510948905, "grad_norm": 1.5886359348363517, "learning_rate": 9.621656316344011e-06, "loss": 0.5985, "step": 1551 }, { "epoch": 0.15104622871046228, "grad_norm": 1.631001321245941, "learning_rate": 9.621054640015255e-06, "loss": 0.6297, "step": 1552 }, { "epoch": 0.15114355231143553, "grad_norm": 1.7004985330783402, "learning_rate": 9.62045250448946e-06, "loss": 0.5153, "step": 1553 }, { "epoch": 0.15124087591240876, "grad_norm": 1.414050528965644, "learning_rate": 9.619849909826457e-06, "loss": 0.2651, "step": 1554 }, { "epoch": 0.15133819951338198, "grad_norm": 1.3361950007111751, "learning_rate": 9.61924685608613e-06, "loss": 0.4179, "step": 1555 }, { "epoch": 0.15143552311435524, "grad_norm": 1.2305020766816175, "learning_rate": 9.618643343328404e-06, "loss": 0.3342, "step": 1556 }, { "epoch": 0.15153284671532846, "grad_norm": 1.3364057110807985, "learning_rate": 9.618039371613251e-06, "loss": 0.357, "step": 1557 }, { "epoch": 0.15163017031630172, "grad_norm": 0.9846564904659728, "learning_rate": 9.617434941000685e-06, "loss": 0.2278, "step": 1558 }, { "epoch": 0.15172749391727494, "grad_norm": 1.4874184978820846, "learning_rate": 9.616830051550772e-06, "loss": 0.4467, "step": 1559 }, { "epoch": 0.15182481751824817, "grad_norm": 1.79907754997464, "learning_rate": 9.61622470332362e-06, "loss": 0.5501, "step": 1560 }, { "epoch": 0.15192214111922142, "grad_norm": 1.2290536645357835, "learning_rate": 9.61561889637938e-06, "loss": 0.3149, "step": 1561 }, { "epoch": 0.15201946472019465, "grad_norm": 1.5048179340178087, "learning_rate": 9.615012630778254e-06, "loss": 0.5367, "step": 1562 }, { "epoch": 0.15211678832116787, "grad_norm": 1.387431259858161, "learning_rate": 9.614405906580486e-06, "loss": 0.4953, "step": 1563 }, { "epoch": 0.15221411192214113, "grad_norm": 1.4159610711473967, "learning_rate": 9.613798723846368e-06, "loss": 0.454, "step": 1564 }, { "epoch": 0.15231143552311435, "grad_norm": 1.2005509919566202, "learning_rate": 9.613191082636235e-06, "loss": 0.3945, "step": 1565 }, { "epoch": 0.15240875912408758, "grad_norm": 1.518451218591156, "learning_rate": 9.612582983010468e-06, "loss": 0.42, "step": 1566 }, { "epoch": 0.15250608272506083, "grad_norm": 1.2817177267697137, "learning_rate": 9.611974425029494e-06, "loss": 0.4119, "step": 1567 }, { "epoch": 0.15260340632603406, "grad_norm": 1.3182769071429, "learning_rate": 9.611365408753787e-06, "loss": 0.4301, "step": 1568 }, { "epoch": 0.1527007299270073, "grad_norm": 1.2668371165350867, "learning_rate": 9.610755934243864e-06, "loss": 0.3415, "step": 1569 }, { "epoch": 0.15279805352798054, "grad_norm": 1.334265705787435, "learning_rate": 9.610146001560293e-06, "loss": 0.325, "step": 1570 }, { "epoch": 0.15289537712895376, "grad_norm": 1.405628575667756, "learning_rate": 9.609535610763678e-06, "loss": 0.4, "step": 1571 }, { "epoch": 0.15299270072992702, "grad_norm": 1.5931859233666277, "learning_rate": 9.608924761914677e-06, "loss": 0.643, "step": 1572 }, { "epoch": 0.15309002433090024, "grad_norm": 1.323715339329346, "learning_rate": 9.608313455073989e-06, "loss": 0.4832, "step": 1573 }, { "epoch": 0.15318734793187347, "grad_norm": 1.1603088792271297, "learning_rate": 9.60770169030236e-06, "loss": 0.2684, "step": 1574 }, { "epoch": 0.15328467153284672, "grad_norm": 1.4578030666688024, "learning_rate": 9.607089467660581e-06, "loss": 0.4418, "step": 1575 }, { "epoch": 0.15338199513381995, "grad_norm": 1.2739086566679132, "learning_rate": 9.606476787209493e-06, "loss": 0.3847, "step": 1576 }, { "epoch": 0.15347931873479317, "grad_norm": 1.4031538044918876, "learning_rate": 9.605863649009973e-06, "loss": 0.3672, "step": 1577 }, { "epoch": 0.15357664233576643, "grad_norm": 1.473592849907526, "learning_rate": 9.605250053122951e-06, "loss": 0.3955, "step": 1578 }, { "epoch": 0.15367396593673965, "grad_norm": 1.6950520258208177, "learning_rate": 9.604635999609402e-06, "loss": 0.6923, "step": 1579 }, { "epoch": 0.1537712895377129, "grad_norm": 1.6074239515288835, "learning_rate": 9.604021488530342e-06, "loss": 0.4771, "step": 1580 }, { "epoch": 0.15386861313868613, "grad_norm": 1.5289432511411145, "learning_rate": 9.603406519946838e-06, "loss": 0.5881, "step": 1581 }, { "epoch": 0.15396593673965936, "grad_norm": 1.3225323677068181, "learning_rate": 9.602791093919998e-06, "loss": 0.3128, "step": 1582 }, { "epoch": 0.1540632603406326, "grad_norm": 1.467417498061456, "learning_rate": 9.60217521051098e-06, "loss": 0.545, "step": 1583 }, { "epoch": 0.15416058394160584, "grad_norm": 1.7568491012309082, "learning_rate": 9.60155886978098e-06, "loss": 0.7054, "step": 1584 }, { "epoch": 0.15425790754257906, "grad_norm": 1.5606257069028109, "learning_rate": 9.600942071791248e-06, "loss": 0.4329, "step": 1585 }, { "epoch": 0.15435523114355232, "grad_norm": 1.5727160833264413, "learning_rate": 9.600324816603074e-06, "loss": 0.6128, "step": 1586 }, { "epoch": 0.15445255474452554, "grad_norm": 1.3864503412663605, "learning_rate": 9.599707104277796e-06, "loss": 0.573, "step": 1587 }, { "epoch": 0.15454987834549877, "grad_norm": 1.4232761061254342, "learning_rate": 9.599088934876794e-06, "loss": 0.4136, "step": 1588 }, { "epoch": 0.15464720194647202, "grad_norm": 1.3399427727677786, "learning_rate": 9.598470308461499e-06, "loss": 0.257, "step": 1589 }, { "epoch": 0.15474452554744525, "grad_norm": 1.61635763649276, "learning_rate": 9.597851225093382e-06, "loss": 0.566, "step": 1590 }, { "epoch": 0.1548418491484185, "grad_norm": 1.6304164262097627, "learning_rate": 9.597231684833964e-06, "loss": 0.3673, "step": 1591 }, { "epoch": 0.15493917274939173, "grad_norm": 1.4592987498064005, "learning_rate": 9.596611687744807e-06, "loss": 0.5193, "step": 1592 }, { "epoch": 0.15503649635036496, "grad_norm": 1.4397292060019447, "learning_rate": 9.595991233887523e-06, "loss": 0.3236, "step": 1593 }, { "epoch": 0.1551338199513382, "grad_norm": 1.2246835494507005, "learning_rate": 9.595370323323763e-06, "loss": 0.2397, "step": 1594 }, { "epoch": 0.15523114355231143, "grad_norm": 1.530797619071646, "learning_rate": 9.59474895611523e-06, "loss": 0.4537, "step": 1595 }, { "epoch": 0.15532846715328466, "grad_norm": 0.9400393110536889, "learning_rate": 9.594127132323669e-06, "loss": 0.1899, "step": 1596 }, { "epoch": 0.15542579075425791, "grad_norm": 1.167634539806263, "learning_rate": 9.593504852010872e-06, "loss": 0.353, "step": 1597 }, { "epoch": 0.15552311435523114, "grad_norm": 1.6772160290018319, "learning_rate": 9.592882115238675e-06, "loss": 0.4194, "step": 1598 }, { "epoch": 0.15562043795620437, "grad_norm": 1.4391641520861267, "learning_rate": 9.592258922068958e-06, "loss": 0.4767, "step": 1599 }, { "epoch": 0.15571776155717762, "grad_norm": 1.544673007447179, "learning_rate": 9.591635272563648e-06, "loss": 0.3175, "step": 1600 }, { "epoch": 0.15581508515815085, "grad_norm": 1.4189512773822923, "learning_rate": 9.591011166784721e-06, "loss": 0.4834, "step": 1601 }, { "epoch": 0.1559124087591241, "grad_norm": 1.2414753149853184, "learning_rate": 9.590386604794191e-06, "loss": 0.3657, "step": 1602 }, { "epoch": 0.15600973236009733, "grad_norm": 1.0236785255419305, "learning_rate": 9.589761586654122e-06, "loss": 0.2011, "step": 1603 }, { "epoch": 0.15610705596107055, "grad_norm": 2.3461369884265357, "learning_rate": 9.589136112426625e-06, "loss": 0.4024, "step": 1604 }, { "epoch": 0.1562043795620438, "grad_norm": 1.2849479900774115, "learning_rate": 9.588510182173851e-06, "loss": 0.3527, "step": 1605 }, { "epoch": 0.15630170316301703, "grad_norm": 1.4153286655317308, "learning_rate": 9.587883795958001e-06, "loss": 0.4149, "step": 1606 }, { "epoch": 0.15639902676399026, "grad_norm": 1.6599870662874754, "learning_rate": 9.587256953841317e-06, "loss": 0.6479, "step": 1607 }, { "epoch": 0.1564963503649635, "grad_norm": 1.6670860080877101, "learning_rate": 9.58662965588609e-06, "loss": 0.5825, "step": 1608 }, { "epoch": 0.15659367396593674, "grad_norm": 1.7776280437765584, "learning_rate": 9.586001902154655e-06, "loss": 0.5798, "step": 1609 }, { "epoch": 0.15669099756690996, "grad_norm": 1.5456297515043347, "learning_rate": 9.585373692709391e-06, "loss": 0.4583, "step": 1610 }, { "epoch": 0.15678832116788322, "grad_norm": 1.8806083091738082, "learning_rate": 9.584745027612728e-06, "loss": 0.4736, "step": 1611 }, { "epoch": 0.15688564476885644, "grad_norm": 1.4790926453601037, "learning_rate": 9.584115906927131e-06, "loss": 0.4172, "step": 1612 }, { "epoch": 0.1569829683698297, "grad_norm": 3.3021500316987633, "learning_rate": 9.58348633071512e-06, "loss": 0.472, "step": 1613 }, { "epoch": 0.15708029197080292, "grad_norm": 1.860435632122749, "learning_rate": 9.582856299039253e-06, "loss": 0.4743, "step": 1614 }, { "epoch": 0.15717761557177615, "grad_norm": 1.7557070181222967, "learning_rate": 9.58222581196214e-06, "loss": 0.2907, "step": 1615 }, { "epoch": 0.1572749391727494, "grad_norm": 1.5588238003780286, "learning_rate": 9.581594869546433e-06, "loss": 0.3803, "step": 1616 }, { "epoch": 0.15737226277372263, "grad_norm": 1.5265824940366777, "learning_rate": 9.580963471854825e-06, "loss": 0.3163, "step": 1617 }, { "epoch": 0.15746958637469585, "grad_norm": 1.5425233608560427, "learning_rate": 9.580331618950063e-06, "loss": 0.3884, "step": 1618 }, { "epoch": 0.1575669099756691, "grad_norm": 1.4123635386488018, "learning_rate": 9.579699310894932e-06, "loss": 0.382, "step": 1619 }, { "epoch": 0.15766423357664233, "grad_norm": 1.578019469103596, "learning_rate": 9.579066547752266e-06, "loss": 0.4293, "step": 1620 }, { "epoch": 0.15776155717761559, "grad_norm": 1.6566990657429592, "learning_rate": 9.578433329584943e-06, "loss": 0.2878, "step": 1621 }, { "epoch": 0.1578588807785888, "grad_norm": 1.5290043771605026, "learning_rate": 9.577799656455886e-06, "loss": 0.4483, "step": 1622 }, { "epoch": 0.15795620437956204, "grad_norm": 1.7268752423292135, "learning_rate": 9.577165528428063e-06, "loss": 0.4805, "step": 1623 }, { "epoch": 0.1580535279805353, "grad_norm": 1.3495189675110832, "learning_rate": 9.576530945564488e-06, "loss": 0.3161, "step": 1624 }, { "epoch": 0.15815085158150852, "grad_norm": 1.4763829359235794, "learning_rate": 9.575895907928218e-06, "loss": 0.4825, "step": 1625 }, { "epoch": 0.15824817518248174, "grad_norm": 1.686991367686583, "learning_rate": 9.575260415582362e-06, "loss": 0.3016, "step": 1626 }, { "epoch": 0.158345498783455, "grad_norm": 1.3390220591470878, "learning_rate": 9.574624468590065e-06, "loss": 0.4523, "step": 1627 }, { "epoch": 0.15844282238442822, "grad_norm": 1.8698808087393168, "learning_rate": 9.573988067014523e-06, "loss": 0.5203, "step": 1628 }, { "epoch": 0.15854014598540145, "grad_norm": 1.4032165021732874, "learning_rate": 9.573351210918976e-06, "loss": 0.3678, "step": 1629 }, { "epoch": 0.1586374695863747, "grad_norm": 1.4017015011859046, "learning_rate": 9.572713900366707e-06, "loss": 0.2798, "step": 1630 }, { "epoch": 0.15873479318734793, "grad_norm": 1.4441030854971395, "learning_rate": 9.572076135421048e-06, "loss": 0.3514, "step": 1631 }, { "epoch": 0.15883211678832118, "grad_norm": 1.3629792761623065, "learning_rate": 9.571437916145373e-06, "loss": 0.4604, "step": 1632 }, { "epoch": 0.1589294403892944, "grad_norm": 1.376972344446985, "learning_rate": 9.570799242603101e-06, "loss": 0.4603, "step": 1633 }, { "epoch": 0.15902676399026763, "grad_norm": 1.5637421057827365, "learning_rate": 9.5701601148577e-06, "loss": 0.5575, "step": 1634 }, { "epoch": 0.1591240875912409, "grad_norm": 1.4338457681188446, "learning_rate": 9.56952053297268e-06, "loss": 0.532, "step": 1635 }, { "epoch": 0.1592214111922141, "grad_norm": 1.4858651962900338, "learning_rate": 9.568880497011597e-06, "loss": 0.4951, "step": 1636 }, { "epoch": 0.15931873479318734, "grad_norm": 1.543423201839799, "learning_rate": 9.568240007038048e-06, "loss": 0.5278, "step": 1637 }, { "epoch": 0.1594160583941606, "grad_norm": 1.408319688012345, "learning_rate": 9.567599063115683e-06, "loss": 0.4474, "step": 1638 }, { "epoch": 0.15951338199513382, "grad_norm": 1.2680346779127702, "learning_rate": 9.566957665308192e-06, "loss": 0.3351, "step": 1639 }, { "epoch": 0.15961070559610704, "grad_norm": 1.6277797838197976, "learning_rate": 9.56631581367931e-06, "loss": 0.3966, "step": 1640 }, { "epoch": 0.1597080291970803, "grad_norm": 1.5248977314161354, "learning_rate": 9.565673508292818e-06, "loss": 0.5211, "step": 1641 }, { "epoch": 0.15980535279805352, "grad_norm": 1.7164012466100764, "learning_rate": 9.565030749212546e-06, "loss": 0.5428, "step": 1642 }, { "epoch": 0.15990267639902678, "grad_norm": 1.6687081549609284, "learning_rate": 9.56438753650236e-06, "loss": 0.2936, "step": 1643 }, { "epoch": 0.16, "grad_norm": 1.5678110268585723, "learning_rate": 9.56374387022618e-06, "loss": 0.5166, "step": 1644 }, { "epoch": 0.16009732360097323, "grad_norm": 1.6983019931785335, "learning_rate": 9.563099750447966e-06, "loss": 0.4822, "step": 1645 }, { "epoch": 0.16019464720194648, "grad_norm": 1.4431824530543444, "learning_rate": 9.562455177231726e-06, "loss": 0.3212, "step": 1646 }, { "epoch": 0.1602919708029197, "grad_norm": 3.712828208723791, "learning_rate": 9.56181015064151e-06, "loss": 0.4286, "step": 1647 }, { "epoch": 0.16038929440389293, "grad_norm": 1.4388083433357408, "learning_rate": 9.561164670741416e-06, "loss": 0.3757, "step": 1648 }, { "epoch": 0.1604866180048662, "grad_norm": 2.4878081586110117, "learning_rate": 9.560518737595586e-06, "loss": 0.3494, "step": 1649 }, { "epoch": 0.16058394160583941, "grad_norm": 2.3091262745384706, "learning_rate": 9.559872351268205e-06, "loss": 0.4607, "step": 1650 }, { "epoch": 0.16068126520681264, "grad_norm": 1.6632563827899045, "learning_rate": 9.559225511823504e-06, "loss": 0.5718, "step": 1651 }, { "epoch": 0.1607785888077859, "grad_norm": 1.6138862417611177, "learning_rate": 9.558578219325763e-06, "loss": 0.325, "step": 1652 }, { "epoch": 0.16087591240875912, "grad_norm": 1.1933317040764397, "learning_rate": 9.557930473839303e-06, "loss": 0.339, "step": 1653 }, { "epoch": 0.16097323600973237, "grad_norm": 0.9728312200944081, "learning_rate": 9.55728227542849e-06, "loss": 0.2395, "step": 1654 }, { "epoch": 0.1610705596107056, "grad_norm": 1.5521742092214053, "learning_rate": 9.556633624157735e-06, "loss": 0.4613, "step": 1655 }, { "epoch": 0.16116788321167883, "grad_norm": 1.639740187603822, "learning_rate": 9.555984520091497e-06, "loss": 0.5146, "step": 1656 }, { "epoch": 0.16126520681265208, "grad_norm": 1.5387772039120604, "learning_rate": 9.555334963294277e-06, "loss": 0.4879, "step": 1657 }, { "epoch": 0.1613625304136253, "grad_norm": 1.2788374913210725, "learning_rate": 9.554684953830622e-06, "loss": 0.2115, "step": 1658 }, { "epoch": 0.16145985401459853, "grad_norm": 1.2466060338770748, "learning_rate": 9.554034491765123e-06, "loss": 0.4057, "step": 1659 }, { "epoch": 0.16155717761557178, "grad_norm": 1.3626765355526065, "learning_rate": 9.553383577162418e-06, "loss": 0.3922, "step": 1660 }, { "epoch": 0.161654501216545, "grad_norm": 1.4993759287568524, "learning_rate": 9.552732210087188e-06, "loss": 0.5101, "step": 1661 }, { "epoch": 0.16175182481751824, "grad_norm": 1.4132678080310175, "learning_rate": 9.55208039060416e-06, "loss": 0.4098, "step": 1662 }, { "epoch": 0.1618491484184915, "grad_norm": 1.3072203759845393, "learning_rate": 9.551428118778105e-06, "loss": 0.4437, "step": 1663 }, { "epoch": 0.16194647201946472, "grad_norm": 1.4197615961970556, "learning_rate": 9.550775394673841e-06, "loss": 0.4855, "step": 1664 }, { "epoch": 0.16204379562043797, "grad_norm": 1.1443578178578404, "learning_rate": 9.550122218356228e-06, "loss": 0.2651, "step": 1665 }, { "epoch": 0.1621411192214112, "grad_norm": 1.6274953169982382, "learning_rate": 9.549468589890173e-06, "loss": 0.5702, "step": 1666 }, { "epoch": 0.16223844282238442, "grad_norm": 1.5542252970145625, "learning_rate": 9.548814509340631e-06, "loss": 0.3618, "step": 1667 }, { "epoch": 0.16233576642335767, "grad_norm": 1.5872588267319008, "learning_rate": 9.548159976772593e-06, "loss": 0.5261, "step": 1668 }, { "epoch": 0.1624330900243309, "grad_norm": 1.1735078752446053, "learning_rate": 9.547504992251102e-06, "loss": 0.2709, "step": 1669 }, { "epoch": 0.16253041362530413, "grad_norm": 1.8057871189139236, "learning_rate": 9.546849555841247e-06, "loss": 0.3383, "step": 1670 }, { "epoch": 0.16262773722627738, "grad_norm": 1.4181568031561294, "learning_rate": 9.546193667608155e-06, "loss": 0.4654, "step": 1671 }, { "epoch": 0.1627250608272506, "grad_norm": 1.3372190697374011, "learning_rate": 9.545537327617004e-06, "loss": 0.4098, "step": 1672 }, { "epoch": 0.16282238442822383, "grad_norm": 1.4054977948345526, "learning_rate": 9.544880535933015e-06, "loss": 0.488, "step": 1673 }, { "epoch": 0.16291970802919709, "grad_norm": 1.8103202340533562, "learning_rate": 9.544223292621456e-06, "loss": 0.2989, "step": 1674 }, { "epoch": 0.1630170316301703, "grad_norm": 1.4424657055300307, "learning_rate": 9.543565597747633e-06, "loss": 0.3545, "step": 1675 }, { "epoch": 0.16311435523114357, "grad_norm": 1.712897793310079, "learning_rate": 9.542907451376904e-06, "loss": 0.4372, "step": 1676 }, { "epoch": 0.1632116788321168, "grad_norm": 1.5856342495538354, "learning_rate": 9.542248853574669e-06, "loss": 0.3552, "step": 1677 }, { "epoch": 0.16330900243309002, "grad_norm": 1.6070757988154845, "learning_rate": 9.541589804406373e-06, "loss": 0.6297, "step": 1678 }, { "epoch": 0.16340632603406327, "grad_norm": 1.4030835423791206, "learning_rate": 9.540930303937508e-06, "loss": 0.5304, "step": 1679 }, { "epoch": 0.1635036496350365, "grad_norm": 1.1629420270697914, "learning_rate": 9.540270352233607e-06, "loss": 0.3196, "step": 1680 }, { "epoch": 0.16360097323600972, "grad_norm": 1.6438421767465334, "learning_rate": 9.53960994936025e-06, "loss": 0.5718, "step": 1681 }, { "epoch": 0.16369829683698298, "grad_norm": 1.4972655485667212, "learning_rate": 9.538949095383064e-06, "loss": 0.5411, "step": 1682 }, { "epoch": 0.1637956204379562, "grad_norm": 1.6855463092047138, "learning_rate": 9.538287790367715e-06, "loss": 0.4072, "step": 1683 }, { "epoch": 0.16389294403892943, "grad_norm": 1.3024464622228382, "learning_rate": 9.537626034379918e-06, "loss": 0.3779, "step": 1684 }, { "epoch": 0.16399026763990268, "grad_norm": 1.295189693137423, "learning_rate": 9.536963827485435e-06, "loss": 0.3687, "step": 1685 }, { "epoch": 0.1640875912408759, "grad_norm": 1.4535138830119652, "learning_rate": 9.536301169750068e-06, "loss": 0.4548, "step": 1686 }, { "epoch": 0.16418491484184916, "grad_norm": 1.199213729997, "learning_rate": 9.535638061239663e-06, "loss": 0.2053, "step": 1687 }, { "epoch": 0.1642822384428224, "grad_norm": 1.5567691993981325, "learning_rate": 9.534974502020117e-06, "loss": 0.4098, "step": 1688 }, { "epoch": 0.1643795620437956, "grad_norm": 1.5701473016338705, "learning_rate": 9.534310492157368e-06, "loss": 0.4663, "step": 1689 }, { "epoch": 0.16447688564476887, "grad_norm": 1.4652608455665965, "learning_rate": 9.533646031717398e-06, "loss": 0.423, "step": 1690 }, { "epoch": 0.1645742092457421, "grad_norm": 1.556818972222242, "learning_rate": 9.532981120766235e-06, "loss": 0.5823, "step": 1691 }, { "epoch": 0.16467153284671532, "grad_norm": 1.3176167070500389, "learning_rate": 9.532315759369953e-06, "loss": 0.3369, "step": 1692 }, { "epoch": 0.16476885644768857, "grad_norm": 1.710131590392248, "learning_rate": 9.531649947594668e-06, "loss": 0.6235, "step": 1693 }, { "epoch": 0.1648661800486618, "grad_norm": 1.316452070848038, "learning_rate": 9.53098368550654e-06, "loss": 0.2773, "step": 1694 }, { "epoch": 0.16496350364963502, "grad_norm": 1.3144552108952152, "learning_rate": 9.53031697317178e-06, "loss": 0.4008, "step": 1695 }, { "epoch": 0.16506082725060828, "grad_norm": 1.6242845867808264, "learning_rate": 9.529649810656638e-06, "loss": 0.4994, "step": 1696 }, { "epoch": 0.1651581508515815, "grad_norm": 1.285181340955318, "learning_rate": 9.52898219802741e-06, "loss": 0.3565, "step": 1697 }, { "epoch": 0.16525547445255476, "grad_norm": 1.5859120183692204, "learning_rate": 9.528314135350439e-06, "loss": 0.6057, "step": 1698 }, { "epoch": 0.16535279805352798, "grad_norm": 1.2413369391689792, "learning_rate": 9.527645622692105e-06, "loss": 0.2912, "step": 1699 }, { "epoch": 0.1654501216545012, "grad_norm": 1.5626898078072964, "learning_rate": 9.526976660118846e-06, "loss": 0.4912, "step": 1700 }, { "epoch": 0.16554744525547446, "grad_norm": 1.355302168314411, "learning_rate": 9.526307247697133e-06, "loss": 0.4066, "step": 1701 }, { "epoch": 0.1656447688564477, "grad_norm": 1.6754743388370108, "learning_rate": 9.525637385493485e-06, "loss": 0.4402, "step": 1702 }, { "epoch": 0.16574209245742091, "grad_norm": 1.4378330010865907, "learning_rate": 9.524967073574468e-06, "loss": 0.3896, "step": 1703 }, { "epoch": 0.16583941605839417, "grad_norm": 1.5562357645264613, "learning_rate": 9.524296312006696e-06, "loss": 0.7178, "step": 1704 }, { "epoch": 0.1659367396593674, "grad_norm": 1.4997676033555023, "learning_rate": 9.523625100856814e-06, "loss": 0.5203, "step": 1705 }, { "epoch": 0.16603406326034062, "grad_norm": 1.39039181243628, "learning_rate": 9.522953440191528e-06, "loss": 0.4804, "step": 1706 }, { "epoch": 0.16613138686131387, "grad_norm": 1.2594698773182105, "learning_rate": 9.522281330077579e-06, "loss": 0.31, "step": 1707 }, { "epoch": 0.1662287104622871, "grad_norm": 1.5394103920539104, "learning_rate": 9.521608770581751e-06, "loss": 0.4579, "step": 1708 }, { "epoch": 0.16632603406326035, "grad_norm": 1.4703967014570463, "learning_rate": 9.520935761770885e-06, "loss": 0.4732, "step": 1709 }, { "epoch": 0.16642335766423358, "grad_norm": 1.0444153315520046, "learning_rate": 9.520262303711851e-06, "loss": 0.2468, "step": 1710 }, { "epoch": 0.1665206812652068, "grad_norm": 1.4440019594110525, "learning_rate": 9.519588396471572e-06, "loss": 0.4979, "step": 1711 }, { "epoch": 0.16661800486618006, "grad_norm": 1.6467368949298022, "learning_rate": 9.518914040117018e-06, "loss": 0.603, "step": 1712 }, { "epoch": 0.16671532846715328, "grad_norm": 1.656027868957794, "learning_rate": 9.518239234715198e-06, "loss": 0.3534, "step": 1713 }, { "epoch": 0.1668126520681265, "grad_norm": 1.409360793352949, "learning_rate": 9.517563980333169e-06, "loss": 0.4442, "step": 1714 }, { "epoch": 0.16690997566909976, "grad_norm": 1.4429795690770129, "learning_rate": 9.51688827703803e-06, "loss": 0.4347, "step": 1715 }, { "epoch": 0.167007299270073, "grad_norm": 1.2256612199861667, "learning_rate": 9.516212124896926e-06, "loss": 0.3582, "step": 1716 }, { "epoch": 0.16710462287104622, "grad_norm": 1.340106815948813, "learning_rate": 9.515535523977047e-06, "loss": 0.4494, "step": 1717 }, { "epoch": 0.16720194647201947, "grad_norm": 1.8033632646616307, "learning_rate": 9.514858474345628e-06, "loss": 0.7254, "step": 1718 }, { "epoch": 0.1672992700729927, "grad_norm": 1.461471704742246, "learning_rate": 9.514180976069948e-06, "loss": 0.4431, "step": 1719 }, { "epoch": 0.16739659367396595, "grad_norm": 1.8149337871023152, "learning_rate": 9.513503029217329e-06, "loss": 0.6808, "step": 1720 }, { "epoch": 0.16749391727493917, "grad_norm": 1.4317488687976054, "learning_rate": 9.51282463385514e-06, "loss": 0.3969, "step": 1721 }, { "epoch": 0.1675912408759124, "grad_norm": 1.406660867644435, "learning_rate": 9.512145790050793e-06, "loss": 0.4466, "step": 1722 }, { "epoch": 0.16768856447688565, "grad_norm": 1.5087949092220858, "learning_rate": 9.511466497871747e-06, "loss": 0.3588, "step": 1723 }, { "epoch": 0.16778588807785888, "grad_norm": 1.3780878680496882, "learning_rate": 9.5107867573855e-06, "loss": 0.4136, "step": 1724 }, { "epoch": 0.1678832116788321, "grad_norm": 1.1785521443758606, "learning_rate": 9.510106568659601e-06, "loss": 0.3319, "step": 1725 }, { "epoch": 0.16798053527980536, "grad_norm": 1.4150065437408217, "learning_rate": 9.50942593176164e-06, "loss": 0.3619, "step": 1726 }, { "epoch": 0.16807785888077859, "grad_norm": 1.5810685607791577, "learning_rate": 9.508744846759254e-06, "loss": 0.5204, "step": 1727 }, { "epoch": 0.1681751824817518, "grad_norm": 1.5507123725258296, "learning_rate": 9.50806331372012e-06, "loss": 0.3017, "step": 1728 }, { "epoch": 0.16827250608272507, "grad_norm": 1.7448176899198176, "learning_rate": 9.507381332711963e-06, "loss": 0.6488, "step": 1729 }, { "epoch": 0.1683698296836983, "grad_norm": 2.0203041353812243, "learning_rate": 9.506698903802553e-06, "loss": 0.2868, "step": 1730 }, { "epoch": 0.16846715328467154, "grad_norm": 1.425557408986151, "learning_rate": 9.506016027059703e-06, "loss": 0.4181, "step": 1731 }, { "epoch": 0.16856447688564477, "grad_norm": 1.658389742609111, "learning_rate": 9.505332702551272e-06, "loss": 0.4834, "step": 1732 }, { "epoch": 0.168661800486618, "grad_norm": 1.6313220070332846, "learning_rate": 9.50464893034516e-06, "loss": 0.6351, "step": 1733 }, { "epoch": 0.16875912408759125, "grad_norm": 1.4860828412814417, "learning_rate": 9.503964710509314e-06, "loss": 0.384, "step": 1734 }, { "epoch": 0.16885644768856448, "grad_norm": 1.5665989326823084, "learning_rate": 9.503280043111729e-06, "loss": 0.5031, "step": 1735 }, { "epoch": 0.1689537712895377, "grad_norm": 1.2627591310970376, "learning_rate": 9.502594928220437e-06, "loss": 0.3557, "step": 1736 }, { "epoch": 0.16905109489051096, "grad_norm": 1.6101827723851228, "learning_rate": 9.50190936590352e-06, "loss": 0.3886, "step": 1737 }, { "epoch": 0.16914841849148418, "grad_norm": 1.190927027644026, "learning_rate": 9.5012233562291e-06, "loss": 0.3, "step": 1738 }, { "epoch": 0.1692457420924574, "grad_norm": 1.6452233677093766, "learning_rate": 9.50053689926535e-06, "loss": 0.5808, "step": 1739 }, { "epoch": 0.16934306569343066, "grad_norm": 1.607284224817037, "learning_rate": 9.499849995080482e-06, "loss": 0.5726, "step": 1740 }, { "epoch": 0.1694403892944039, "grad_norm": 1.360873175063302, "learning_rate": 9.499162643742754e-06, "loss": 0.3294, "step": 1741 }, { "epoch": 0.16953771289537714, "grad_norm": 1.6205396325650636, "learning_rate": 9.49847484532047e-06, "loss": 0.5496, "step": 1742 }, { "epoch": 0.16963503649635037, "grad_norm": 1.6677491090337848, "learning_rate": 9.497786599881973e-06, "loss": 0.5745, "step": 1743 }, { "epoch": 0.1697323600973236, "grad_norm": 1.4765151889225172, "learning_rate": 9.497097907495658e-06, "loss": 0.3552, "step": 1744 }, { "epoch": 0.16982968369829685, "grad_norm": 1.4991516257283077, "learning_rate": 9.496408768229962e-06, "loss": 0.6004, "step": 1745 }, { "epoch": 0.16992700729927007, "grad_norm": 1.394241003611109, "learning_rate": 9.49571918215336e-06, "loss": 0.4166, "step": 1746 }, { "epoch": 0.1700243309002433, "grad_norm": 1.2418310265706307, "learning_rate": 9.495029149334381e-06, "loss": 0.3754, "step": 1747 }, { "epoch": 0.17012165450121655, "grad_norm": 1.7344174079178016, "learning_rate": 9.494338669841592e-06, "loss": 0.6136, "step": 1748 }, { "epoch": 0.17021897810218978, "grad_norm": 1.689754745813109, "learning_rate": 9.493647743743605e-06, "loss": 0.3066, "step": 1749 }, { "epoch": 0.170316301703163, "grad_norm": 1.5986274434851808, "learning_rate": 9.492956371109083e-06, "loss": 0.6476, "step": 1750 }, { "epoch": 0.17041362530413626, "grad_norm": 1.3892856963539753, "learning_rate": 9.492264552006725e-06, "loss": 0.2438, "step": 1751 }, { "epoch": 0.17051094890510948, "grad_norm": 1.3744062095245357, "learning_rate": 9.491572286505275e-06, "loss": 0.4154, "step": 1752 }, { "epoch": 0.17060827250608274, "grad_norm": 1.3041989445373636, "learning_rate": 9.490879574673528e-06, "loss": 0.3603, "step": 1753 }, { "epoch": 0.17070559610705596, "grad_norm": 1.2198251236981021, "learning_rate": 9.490186416580317e-06, "loss": 0.3382, "step": 1754 }, { "epoch": 0.1708029197080292, "grad_norm": 1.0699077871285796, "learning_rate": 9.489492812294521e-06, "loss": 0.2805, "step": 1755 }, { "epoch": 0.17090024330900244, "grad_norm": 1.8289792925797566, "learning_rate": 9.488798761885064e-06, "loss": 0.2551, "step": 1756 }, { "epoch": 0.17099756690997567, "grad_norm": 1.5156970449411904, "learning_rate": 9.488104265420917e-06, "loss": 0.5468, "step": 1757 }, { "epoch": 0.1710948905109489, "grad_norm": 1.3669899498040559, "learning_rate": 9.487409322971089e-06, "loss": 0.4705, "step": 1758 }, { "epoch": 0.17119221411192215, "grad_norm": 1.4212977316967985, "learning_rate": 9.486713934604638e-06, "loss": 0.5259, "step": 1759 }, { "epoch": 0.17128953771289537, "grad_norm": 1.3256503218660822, "learning_rate": 9.486018100390668e-06, "loss": 0.3825, "step": 1760 }, { "epoch": 0.17138686131386863, "grad_norm": 1.3448672418414023, "learning_rate": 9.485321820398321e-06, "loss": 0.4984, "step": 1761 }, { "epoch": 0.17148418491484185, "grad_norm": 1.3293122762885854, "learning_rate": 9.484625094696788e-06, "loss": 0.4419, "step": 1762 }, { "epoch": 0.17158150851581508, "grad_norm": 1.5749728003681251, "learning_rate": 9.483927923355303e-06, "loss": 0.4512, "step": 1763 }, { "epoch": 0.17167883211678833, "grad_norm": 2.1875449039755, "learning_rate": 9.483230306443144e-06, "loss": 0.4606, "step": 1764 }, { "epoch": 0.17177615571776156, "grad_norm": 1.4675466599593059, "learning_rate": 9.482532244029632e-06, "loss": 0.5098, "step": 1765 }, { "epoch": 0.17187347931873478, "grad_norm": 1.4395657499189969, "learning_rate": 9.481833736184137e-06, "loss": 0.4196, "step": 1766 }, { "epoch": 0.17197080291970804, "grad_norm": 1.6202346179751734, "learning_rate": 9.48113478297607e-06, "loss": 0.4083, "step": 1767 }, { "epoch": 0.17206812652068126, "grad_norm": 1.943359375, "learning_rate": 9.480435384474884e-06, "loss": 0.3829, "step": 1768 }, { "epoch": 0.1721654501216545, "grad_norm": 1.3957800309361543, "learning_rate": 9.47973554075008e-06, "loss": 0.4776, "step": 1769 }, { "epoch": 0.17226277372262774, "grad_norm": 1.3277740014703983, "learning_rate": 9.479035251871202e-06, "loss": 0.2944, "step": 1770 }, { "epoch": 0.17236009732360097, "grad_norm": 1.5955109684829234, "learning_rate": 9.478334517907838e-06, "loss": 0.4713, "step": 1771 }, { "epoch": 0.17245742092457422, "grad_norm": 1.209763677864614, "learning_rate": 9.477633338929621e-06, "loss": 0.2925, "step": 1772 }, { "epoch": 0.17255474452554745, "grad_norm": 1.6082316661319236, "learning_rate": 9.476931715006225e-06, "loss": 0.6037, "step": 1773 }, { "epoch": 0.17265206812652067, "grad_norm": 1.5310145605828824, "learning_rate": 9.476229646207375e-06, "loss": 0.427, "step": 1774 }, { "epoch": 0.17274939172749393, "grad_norm": 1.5161322305327478, "learning_rate": 9.475527132602833e-06, "loss": 0.5765, "step": 1775 }, { "epoch": 0.17284671532846715, "grad_norm": 1.5515912532543141, "learning_rate": 9.47482417426241e-06, "loss": 0.4693, "step": 1776 }, { "epoch": 0.17294403892944038, "grad_norm": 1.273583152257964, "learning_rate": 9.474120771255956e-06, "loss": 0.401, "step": 1777 }, { "epoch": 0.17304136253041363, "grad_norm": 1.3058387108850102, "learning_rate": 9.473416923653373e-06, "loss": 0.4651, "step": 1778 }, { "epoch": 0.17313868613138686, "grad_norm": 1.4876685295483647, "learning_rate": 9.472712631524599e-06, "loss": 0.5423, "step": 1779 }, { "epoch": 0.17323600973236009, "grad_norm": 1.4134646674679987, "learning_rate": 9.472007894939624e-06, "loss": 0.448, "step": 1780 }, { "epoch": 0.17333333333333334, "grad_norm": 1.3805564537318322, "learning_rate": 9.471302713968473e-06, "loss": 0.2429, "step": 1781 }, { "epoch": 0.17343065693430657, "grad_norm": 1.4256414475552066, "learning_rate": 9.470597088681225e-06, "loss": 0.4821, "step": 1782 }, { "epoch": 0.17352798053527982, "grad_norm": 1.2857804565204727, "learning_rate": 9.469891019147996e-06, "loss": 0.3177, "step": 1783 }, { "epoch": 0.17362530413625304, "grad_norm": 1.7384422656290006, "learning_rate": 9.46918450543895e-06, "loss": 0.6144, "step": 1784 }, { "epoch": 0.17372262773722627, "grad_norm": 1.3733947226466707, "learning_rate": 9.46847754762429e-06, "loss": 0.3777, "step": 1785 }, { "epoch": 0.17381995133819952, "grad_norm": 1.090627736959876, "learning_rate": 9.467770145774271e-06, "loss": 0.307, "step": 1786 }, { "epoch": 0.17391727493917275, "grad_norm": 1.5306242617959314, "learning_rate": 9.467062299959187e-06, "loss": 0.4652, "step": 1787 }, { "epoch": 0.17401459854014598, "grad_norm": 1.6335244702718128, "learning_rate": 9.466354010249375e-06, "loss": 0.5127, "step": 1788 }, { "epoch": 0.17411192214111923, "grad_norm": 1.3582351114688258, "learning_rate": 9.465645276715221e-06, "loss": 0.4213, "step": 1789 }, { "epoch": 0.17420924574209246, "grad_norm": 1.4962342995542501, "learning_rate": 9.464936099427151e-06, "loss": 0.4327, "step": 1790 }, { "epoch": 0.17430656934306568, "grad_norm": 1.513533102257641, "learning_rate": 9.464226478455636e-06, "loss": 0.4527, "step": 1791 }, { "epoch": 0.17440389294403894, "grad_norm": 1.4174664240767785, "learning_rate": 9.463516413871193e-06, "loss": 0.4986, "step": 1792 }, { "epoch": 0.17450121654501216, "grad_norm": 1.283758777636687, "learning_rate": 9.46280590574438e-06, "loss": 0.4648, "step": 1793 }, { "epoch": 0.17459854014598541, "grad_norm": 1.3960565511895506, "learning_rate": 9.4620949541458e-06, "loss": 0.3587, "step": 1794 }, { "epoch": 0.17469586374695864, "grad_norm": 1.6199676647020385, "learning_rate": 9.461383559146104e-06, "loss": 0.5292, "step": 1795 }, { "epoch": 0.17479318734793187, "grad_norm": 1.5028051531717803, "learning_rate": 9.46067172081598e-06, "loss": 0.4903, "step": 1796 }, { "epoch": 0.17489051094890512, "grad_norm": 1.683063467822515, "learning_rate": 9.459959439226165e-06, "loss": 0.3106, "step": 1797 }, { "epoch": 0.17498783454987835, "grad_norm": 1.3296224342860092, "learning_rate": 9.459246714447439e-06, "loss": 0.409, "step": 1798 }, { "epoch": 0.17508515815085157, "grad_norm": 1.5847151231323486, "learning_rate": 9.458533546550628e-06, "loss": 0.4169, "step": 1799 }, { "epoch": 0.17518248175182483, "grad_norm": 1.495253204796384, "learning_rate": 9.457819935606596e-06, "loss": 0.3753, "step": 1800 }, { "epoch": 0.17527980535279805, "grad_norm": 1.4723876384358174, "learning_rate": 9.45710588168626e-06, "loss": 0.2437, "step": 1801 }, { "epoch": 0.17537712895377128, "grad_norm": 1.5610295815557715, "learning_rate": 9.45639138486057e-06, "loss": 0.5651, "step": 1802 }, { "epoch": 0.17547445255474453, "grad_norm": 1.5876154549734276, "learning_rate": 9.45567644520053e-06, "loss": 0.4835, "step": 1803 }, { "epoch": 0.17557177615571776, "grad_norm": 1.5619759252942187, "learning_rate": 9.454961062777181e-06, "loss": 0.3036, "step": 1804 }, { "epoch": 0.175669099756691, "grad_norm": 1.0144029160546408, "learning_rate": 9.454245237661617e-06, "loss": 0.219, "step": 1805 }, { "epoch": 0.17576642335766424, "grad_norm": 1.717922774563162, "learning_rate": 9.453528969924963e-06, "loss": 0.5388, "step": 1806 }, { "epoch": 0.17586374695863746, "grad_norm": 1.288743598100688, "learning_rate": 9.452812259638399e-06, "loss": 0.4171, "step": 1807 }, { "epoch": 0.17596107055961072, "grad_norm": 1.951279890184611, "learning_rate": 9.452095106873142e-06, "loss": 0.3823, "step": 1808 }, { "epoch": 0.17605839416058394, "grad_norm": 1.352467757455935, "learning_rate": 9.45137751170046e-06, "loss": 0.3137, "step": 1809 }, { "epoch": 0.17615571776155717, "grad_norm": 1.3883395327139227, "learning_rate": 9.450659474191658e-06, "loss": 0.4878, "step": 1810 }, { "epoch": 0.17625304136253042, "grad_norm": 1.5658708451700805, "learning_rate": 9.449940994418088e-06, "loss": 0.523, "step": 1811 }, { "epoch": 0.17635036496350365, "grad_norm": 1.215080164631292, "learning_rate": 9.449222072451147e-06, "loss": 0.3773, "step": 1812 }, { "epoch": 0.17644768856447687, "grad_norm": 1.524331324578441, "learning_rate": 9.448502708362273e-06, "loss": 0.539, "step": 1813 }, { "epoch": 0.17654501216545013, "grad_norm": 1.6985132616371517, "learning_rate": 9.447782902222951e-06, "loss": 0.6344, "step": 1814 }, { "epoch": 0.17664233576642335, "grad_norm": 1.394156226586294, "learning_rate": 9.447062654104708e-06, "loss": 0.4136, "step": 1815 }, { "epoch": 0.1767396593673966, "grad_norm": 1.0359913462457855, "learning_rate": 9.446341964079116e-06, "loss": 0.2471, "step": 1816 }, { "epoch": 0.17683698296836983, "grad_norm": 1.6379291001324041, "learning_rate": 9.44562083221779e-06, "loss": 0.4648, "step": 1817 }, { "epoch": 0.17693430656934306, "grad_norm": 1.0926982727654353, "learning_rate": 9.44489925859239e-06, "loss": 0.253, "step": 1818 }, { "epoch": 0.1770316301703163, "grad_norm": 1.3396314447206463, "learning_rate": 9.444177243274619e-06, "loss": 0.4053, "step": 1819 }, { "epoch": 0.17712895377128954, "grad_norm": 1.2170984864894128, "learning_rate": 9.44345478633622e-06, "loss": 0.3483, "step": 1820 }, { "epoch": 0.17722627737226276, "grad_norm": 1.9241463489982464, "learning_rate": 9.442731887848993e-06, "loss": 0.7875, "step": 1821 }, { "epoch": 0.17732360097323602, "grad_norm": 1.7367037011857493, "learning_rate": 9.442008547884765e-06, "loss": 0.5423, "step": 1822 }, { "epoch": 0.17742092457420924, "grad_norm": 1.7768925691501514, "learning_rate": 9.441284766515417e-06, "loss": 0.5332, "step": 1823 }, { "epoch": 0.17751824817518247, "grad_norm": 1.544872490519166, "learning_rate": 9.440560543812872e-06, "loss": 0.4797, "step": 1824 }, { "epoch": 0.17761557177615572, "grad_norm": 1.3959412272112985, "learning_rate": 9.439835879849097e-06, "loss": 0.2813, "step": 1825 }, { "epoch": 0.17771289537712895, "grad_norm": 1.4333698815114406, "learning_rate": 9.439110774696101e-06, "loss": 0.4623, "step": 1826 }, { "epoch": 0.1778102189781022, "grad_norm": 1.4483549520432324, "learning_rate": 9.43838522842594e-06, "loss": 0.3718, "step": 1827 }, { "epoch": 0.17790754257907543, "grad_norm": 1.1321375447475677, "learning_rate": 9.43765924111071e-06, "loss": 0.3035, "step": 1828 }, { "epoch": 0.17800486618004865, "grad_norm": 1.362326738732822, "learning_rate": 9.436932812822554e-06, "loss": 0.316, "step": 1829 }, { "epoch": 0.1781021897810219, "grad_norm": 1.460799021966237, "learning_rate": 9.436205943633656e-06, "loss": 0.3911, "step": 1830 }, { "epoch": 0.17819951338199513, "grad_norm": 1.5389161016090995, "learning_rate": 9.435478633616247e-06, "loss": 0.5521, "step": 1831 }, { "epoch": 0.17829683698296836, "grad_norm": 1.5219868331018827, "learning_rate": 9.4347508828426e-06, "loss": 0.5027, "step": 1832 }, { "epoch": 0.1783941605839416, "grad_norm": 1.245087028586955, "learning_rate": 9.434022691385034e-06, "loss": 0.2981, "step": 1833 }, { "epoch": 0.17849148418491484, "grad_norm": 1.4557548434245557, "learning_rate": 9.433294059315905e-06, "loss": 0.2293, "step": 1834 }, { "epoch": 0.17858880778588807, "grad_norm": 1.3081558633618169, "learning_rate": 9.432564986707621e-06, "loss": 0.4217, "step": 1835 }, { "epoch": 0.17868613138686132, "grad_norm": 1.3513560054673133, "learning_rate": 9.43183547363263e-06, "loss": 0.4318, "step": 1836 }, { "epoch": 0.17878345498783454, "grad_norm": 1.3315264956466353, "learning_rate": 9.431105520163426e-06, "loss": 0.3781, "step": 1837 }, { "epoch": 0.1788807785888078, "grad_norm": 1.0550787306059675, "learning_rate": 9.430375126372542e-06, "loss": 0.3104, "step": 1838 }, { "epoch": 0.17897810218978102, "grad_norm": 1.3337629142786684, "learning_rate": 9.429644292332557e-06, "loss": 0.3455, "step": 1839 }, { "epoch": 0.17907542579075425, "grad_norm": 1.6239197882024916, "learning_rate": 9.428913018116098e-06, "loss": 0.5855, "step": 1840 }, { "epoch": 0.1791727493917275, "grad_norm": 1.3780162846249417, "learning_rate": 9.428181303795828e-06, "loss": 0.3643, "step": 1841 }, { "epoch": 0.17927007299270073, "grad_norm": 1.3478310292007554, "learning_rate": 9.42744914944446e-06, "loss": 0.3962, "step": 1842 }, { "epoch": 0.17936739659367396, "grad_norm": 1.5440243743593307, "learning_rate": 9.426716555134751e-06, "loss": 0.6193, "step": 1843 }, { "epoch": 0.1794647201946472, "grad_norm": 1.4878960058265709, "learning_rate": 9.425983520939495e-06, "loss": 0.473, "step": 1844 }, { "epoch": 0.17956204379562044, "grad_norm": 1.672460221871015, "learning_rate": 9.425250046931539e-06, "loss": 0.6429, "step": 1845 }, { "epoch": 0.17965936739659366, "grad_norm": 1.6015212635221012, "learning_rate": 9.424516133183762e-06, "loss": 0.3195, "step": 1846 }, { "epoch": 0.17975669099756691, "grad_norm": 1.385761715171386, "learning_rate": 9.4237817797691e-06, "loss": 0.4054, "step": 1847 }, { "epoch": 0.17985401459854014, "grad_norm": 1.386847906411032, "learning_rate": 9.423046986760522e-06, "loss": 0.3825, "step": 1848 }, { "epoch": 0.1799513381995134, "grad_norm": 1.087510047515406, "learning_rate": 9.422311754231047e-06, "loss": 0.3213, "step": 1849 }, { "epoch": 0.18004866180048662, "grad_norm": 1.6065416301387576, "learning_rate": 9.421576082253734e-06, "loss": 0.5062, "step": 1850 }, { "epoch": 0.18014598540145985, "grad_norm": 1.34096451308299, "learning_rate": 9.42083997090169e-06, "loss": 0.4036, "step": 1851 }, { "epoch": 0.1802433090024331, "grad_norm": 1.2557739418598393, "learning_rate": 9.42010342024806e-06, "loss": 0.3595, "step": 1852 }, { "epoch": 0.18034063260340633, "grad_norm": 1.5281441778996137, "learning_rate": 9.419366430366035e-06, "loss": 0.604, "step": 1853 }, { "epoch": 0.18043795620437955, "grad_norm": 1.2665309724570952, "learning_rate": 9.418629001328852e-06, "loss": 0.4205, "step": 1854 }, { "epoch": 0.1805352798053528, "grad_norm": 1.3442942382162348, "learning_rate": 9.417891133209789e-06, "loss": 0.3457, "step": 1855 }, { "epoch": 0.18063260340632603, "grad_norm": 1.4106593198915445, "learning_rate": 9.417152826082169e-06, "loss": 0.4812, "step": 1856 }, { "epoch": 0.18072992700729926, "grad_norm": 1.4377180846268287, "learning_rate": 9.416414080019359e-06, "loss": 0.4618, "step": 1857 }, { "epoch": 0.1808272506082725, "grad_norm": 3.1493721230250182, "learning_rate": 9.415674895094765e-06, "loss": 0.4636, "step": 1858 }, { "epoch": 0.18092457420924574, "grad_norm": 1.2019926414899231, "learning_rate": 9.414935271381844e-06, "loss": 0.3081, "step": 1859 }, { "epoch": 0.181021897810219, "grad_norm": 2.6470194483303042, "learning_rate": 9.41419520895409e-06, "loss": 0.545, "step": 1860 }, { "epoch": 0.18111922141119222, "grad_norm": 1.2980614715591199, "learning_rate": 9.413454707885048e-06, "loss": 0.2964, "step": 1861 }, { "epoch": 0.18121654501216544, "grad_norm": 1.0776172492719038, "learning_rate": 9.412713768248296e-06, "loss": 0.3014, "step": 1862 }, { "epoch": 0.1813138686131387, "grad_norm": 1.6105644497131084, "learning_rate": 9.411972390117466e-06, "loss": 0.2939, "step": 1863 }, { "epoch": 0.18141119221411192, "grad_norm": 1.5656908641978677, "learning_rate": 9.411230573566227e-06, "loss": 0.5202, "step": 1864 }, { "epoch": 0.18150851581508515, "grad_norm": 1.303806212869287, "learning_rate": 9.410488318668294e-06, "loss": 0.333, "step": 1865 }, { "epoch": 0.1816058394160584, "grad_norm": 1.6655746538236336, "learning_rate": 9.409745625497427e-06, "loss": 0.432, "step": 1866 }, { "epoch": 0.18170316301703163, "grad_norm": 1.3843667729738216, "learning_rate": 9.409002494127427e-06, "loss": 0.3721, "step": 1867 }, { "epoch": 0.18180048661800485, "grad_norm": 1.119511993732411, "learning_rate": 9.408258924632139e-06, "loss": 0.3344, "step": 1868 }, { "epoch": 0.1818978102189781, "grad_norm": 1.402581324947916, "learning_rate": 9.407514917085451e-06, "loss": 0.4016, "step": 1869 }, { "epoch": 0.18199513381995133, "grad_norm": 1.424239738841203, "learning_rate": 9.406770471561298e-06, "loss": 0.4043, "step": 1870 }, { "epoch": 0.18209245742092459, "grad_norm": 1.4825401610777273, "learning_rate": 9.406025588133654e-06, "loss": 0.5446, "step": 1871 }, { "epoch": 0.1821897810218978, "grad_norm": 1.1812973154269832, "learning_rate": 9.405280266876539e-06, "loss": 0.3086, "step": 1872 }, { "epoch": 0.18228710462287104, "grad_norm": 1.458454653825207, "learning_rate": 9.404534507864015e-06, "loss": 0.426, "step": 1873 }, { "epoch": 0.1823844282238443, "grad_norm": 1.4345175445802738, "learning_rate": 9.403788311170193e-06, "loss": 0.4826, "step": 1874 }, { "epoch": 0.18248175182481752, "grad_norm": 1.636664123351898, "learning_rate": 9.403041676869217e-06, "loss": 0.5861, "step": 1875 }, { "epoch": 0.18257907542579074, "grad_norm": 1.4112207510715695, "learning_rate": 9.402294605035285e-06, "loss": 0.3575, "step": 1876 }, { "epoch": 0.182676399026764, "grad_norm": 1.5632317164864975, "learning_rate": 9.401547095742631e-06, "loss": 0.5798, "step": 1877 }, { "epoch": 0.18277372262773722, "grad_norm": 1.2700759423445944, "learning_rate": 9.400799149065538e-06, "loss": 0.3928, "step": 1878 }, { "epoch": 0.18287104622871045, "grad_norm": 1.1318646905388465, "learning_rate": 9.400050765078327e-06, "loss": 0.2783, "step": 1879 }, { "epoch": 0.1829683698296837, "grad_norm": 1.1697084872304198, "learning_rate": 9.399301943855368e-06, "loss": 0.2715, "step": 1880 }, { "epoch": 0.18306569343065693, "grad_norm": 1.4137887426273796, "learning_rate": 9.39855268547107e-06, "loss": 0.3049, "step": 1881 }, { "epoch": 0.18316301703163018, "grad_norm": 1.3869164554267486, "learning_rate": 9.397802989999888e-06, "loss": 0.3526, "step": 1882 }, { "epoch": 0.1832603406326034, "grad_norm": 1.3336674996684654, "learning_rate": 9.39705285751632e-06, "loss": 0.3914, "step": 1883 }, { "epoch": 0.18335766423357663, "grad_norm": 1.2095628873380657, "learning_rate": 9.396302288094907e-06, "loss": 0.3577, "step": 1884 }, { "epoch": 0.1834549878345499, "grad_norm": 1.4741118747641506, "learning_rate": 9.395551281810233e-06, "loss": 0.4753, "step": 1885 }, { "epoch": 0.1835523114355231, "grad_norm": 1.5440799623052803, "learning_rate": 9.394799838736928e-06, "loss": 0.5143, "step": 1886 }, { "epoch": 0.18364963503649634, "grad_norm": 1.6461828641301555, "learning_rate": 9.394047958949661e-06, "loss": 0.5046, "step": 1887 }, { "epoch": 0.1837469586374696, "grad_norm": 1.3077272649446732, "learning_rate": 9.393295642523147e-06, "loss": 0.4505, "step": 1888 }, { "epoch": 0.18384428223844282, "grad_norm": 1.3954964938282017, "learning_rate": 9.392542889532146e-06, "loss": 0.3752, "step": 1889 }, { "epoch": 0.18394160583941604, "grad_norm": 1.4332674159188397, "learning_rate": 9.391789700051457e-06, "loss": 0.4102, "step": 1890 }, { "epoch": 0.1840389294403893, "grad_norm": 1.5291760471205262, "learning_rate": 9.391036074155926e-06, "loss": 0.3892, "step": 1891 }, { "epoch": 0.18413625304136252, "grad_norm": 1.3194046059109847, "learning_rate": 9.390282011920442e-06, "loss": 0.3402, "step": 1892 }, { "epoch": 0.18423357664233578, "grad_norm": 1.1218553674196712, "learning_rate": 9.389527513419935e-06, "loss": 0.2705, "step": 1893 }, { "epoch": 0.184330900243309, "grad_norm": 1.4415924034763155, "learning_rate": 9.388772578729382e-06, "loss": 0.4153, "step": 1894 }, { "epoch": 0.18442822384428223, "grad_norm": 1.1449469634853555, "learning_rate": 9.3880172079238e-06, "loss": 0.2464, "step": 1895 }, { "epoch": 0.18452554744525548, "grad_norm": 1.3609647553229742, "learning_rate": 9.38726140107825e-06, "loss": 0.4167, "step": 1896 }, { "epoch": 0.1846228710462287, "grad_norm": 1.5005607351629322, "learning_rate": 9.38650515826784e-06, "loss": 0.5496, "step": 1897 }, { "epoch": 0.18472019464720194, "grad_norm": 1.2988771816540412, "learning_rate": 9.385748479567715e-06, "loss": 0.3746, "step": 1898 }, { "epoch": 0.1848175182481752, "grad_norm": 1.6297457427665438, "learning_rate": 9.384991365053066e-06, "loss": 0.5329, "step": 1899 }, { "epoch": 0.18491484184914841, "grad_norm": 1.4260746902123356, "learning_rate": 9.384233814799133e-06, "loss": 0.5495, "step": 1900 }, { "epoch": 0.18501216545012167, "grad_norm": 1.6131616876000299, "learning_rate": 9.38347582888119e-06, "loss": 0.4956, "step": 1901 }, { "epoch": 0.1851094890510949, "grad_norm": 1.2427047036633028, "learning_rate": 9.382717407374559e-06, "loss": 0.3527, "step": 1902 }, { "epoch": 0.18520681265206812, "grad_norm": 1.1650358905093554, "learning_rate": 9.381958550354607e-06, "loss": 0.3282, "step": 1903 }, { "epoch": 0.18530413625304137, "grad_norm": 1.2422827918011654, "learning_rate": 9.381199257896738e-06, "loss": 0.3954, "step": 1904 }, { "epoch": 0.1854014598540146, "grad_norm": 1.3772059864511268, "learning_rate": 9.38043953007641e-06, "loss": 0.2519, "step": 1905 }, { "epoch": 0.18549878345498783, "grad_norm": 1.2627132972091453, "learning_rate": 9.379679366969108e-06, "loss": 0.3748, "step": 1906 }, { "epoch": 0.18559610705596108, "grad_norm": 1.7742544300786764, "learning_rate": 9.378918768650379e-06, "loss": 0.4627, "step": 1907 }, { "epoch": 0.1856934306569343, "grad_norm": 1.3460661864821146, "learning_rate": 9.3781577351958e-06, "loss": 0.3769, "step": 1908 }, { "epoch": 0.18579075425790753, "grad_norm": 1.2948363493096455, "learning_rate": 9.377396266680993e-06, "loss": 0.255, "step": 1909 }, { "epoch": 0.18588807785888078, "grad_norm": 1.4260435934265066, "learning_rate": 9.376634363181631e-06, "loss": 0.4158, "step": 1910 }, { "epoch": 0.185985401459854, "grad_norm": 1.4136193355548345, "learning_rate": 9.375872024773423e-06, "loss": 0.3764, "step": 1911 }, { "epoch": 0.18608272506082726, "grad_norm": 1.2338333390059972, "learning_rate": 9.375109251532121e-06, "loss": 0.3785, "step": 1912 }, { "epoch": 0.1861800486618005, "grad_norm": 1.535249430616727, "learning_rate": 9.374346043533524e-06, "loss": 0.5252, "step": 1913 }, { "epoch": 0.18627737226277372, "grad_norm": 1.215284604855692, "learning_rate": 9.373582400853472e-06, "loss": 0.3295, "step": 1914 }, { "epoch": 0.18637469586374697, "grad_norm": 1.331605367733698, "learning_rate": 9.372818323567847e-06, "loss": 0.2818, "step": 1915 }, { "epoch": 0.1864720194647202, "grad_norm": 1.3700650260278666, "learning_rate": 9.37205381175258e-06, "loss": 0.5125, "step": 1916 }, { "epoch": 0.18656934306569342, "grad_norm": 1.0730618437287824, "learning_rate": 9.371288865483637e-06, "loss": 0.3608, "step": 1917 }, { "epoch": 0.18666666666666668, "grad_norm": 1.6775147335354874, "learning_rate": 9.370523484837033e-06, "loss": 0.4555, "step": 1918 }, { "epoch": 0.1867639902676399, "grad_norm": 1.531630799569193, "learning_rate": 9.369757669888822e-06, "loss": 0.502, "step": 1919 }, { "epoch": 0.18686131386861313, "grad_norm": 0.924734272033398, "learning_rate": 9.368991420715109e-06, "loss": 0.2117, "step": 1920 }, { "epoch": 0.18695863746958638, "grad_norm": 1.3568146369682141, "learning_rate": 9.36822473739203e-06, "loss": 0.4311, "step": 1921 }, { "epoch": 0.1870559610705596, "grad_norm": 1.2577909858711795, "learning_rate": 9.367457619995776e-06, "loss": 0.405, "step": 1922 }, { "epoch": 0.18715328467153286, "grad_norm": 1.5933524739274278, "learning_rate": 9.366690068602573e-06, "loss": 0.627, "step": 1923 }, { "epoch": 0.18725060827250609, "grad_norm": 1.279419778059805, "learning_rate": 9.365922083288694e-06, "loss": 0.2814, "step": 1924 }, { "epoch": 0.1873479318734793, "grad_norm": 1.6336124778487715, "learning_rate": 9.365153664130454e-06, "loss": 0.6461, "step": 1925 }, { "epoch": 0.18744525547445257, "grad_norm": 5.906434394339674, "learning_rate": 9.364384811204212e-06, "loss": 0.5628, "step": 1926 }, { "epoch": 0.1875425790754258, "grad_norm": 1.2770793302804129, "learning_rate": 9.363615524586368e-06, "loss": 0.303, "step": 1927 }, { "epoch": 0.18763990267639902, "grad_norm": 1.2695156624644028, "learning_rate": 9.362845804353367e-06, "loss": 0.3592, "step": 1928 }, { "epoch": 0.18773722627737227, "grad_norm": 1.4443375056776053, "learning_rate": 9.362075650581698e-06, "loss": 0.4701, "step": 1929 }, { "epoch": 0.1878345498783455, "grad_norm": 1.4330727776563095, "learning_rate": 9.36130506334789e-06, "loss": 0.5163, "step": 1930 }, { "epoch": 0.18793187347931872, "grad_norm": 1.326934280688427, "learning_rate": 9.360534042728517e-06, "loss": 0.289, "step": 1931 }, { "epoch": 0.18802919708029198, "grad_norm": 1.0531370847104877, "learning_rate": 9.359762588800195e-06, "loss": 0.1994, "step": 1932 }, { "epoch": 0.1881265206812652, "grad_norm": 1.4998435892573359, "learning_rate": 9.358990701639585e-06, "loss": 0.4064, "step": 1933 }, { "epoch": 0.18822384428223846, "grad_norm": 2.65155925581941, "learning_rate": 9.358218381323391e-06, "loss": 0.3513, "step": 1934 }, { "epoch": 0.18832116788321168, "grad_norm": 1.280523326704506, "learning_rate": 9.357445627928356e-06, "loss": 0.3132, "step": 1935 }, { "epoch": 0.1884184914841849, "grad_norm": 1.347047087613105, "learning_rate": 9.356672441531273e-06, "loss": 0.3334, "step": 1936 }, { "epoch": 0.18851581508515816, "grad_norm": 1.2987558904079175, "learning_rate": 9.35589882220897e-06, "loss": 0.3224, "step": 1937 }, { "epoch": 0.1886131386861314, "grad_norm": 0.9974048438134153, "learning_rate": 9.355124770038323e-06, "loss": 0.2764, "step": 1938 }, { "epoch": 0.1887104622871046, "grad_norm": 2.544180913694316, "learning_rate": 9.354350285096255e-06, "loss": 0.495, "step": 1939 }, { "epoch": 0.18880778588807787, "grad_norm": 1.613510595834776, "learning_rate": 9.353575367459718e-06, "loss": 0.5269, "step": 1940 }, { "epoch": 0.1889051094890511, "grad_norm": 1.1663508101189002, "learning_rate": 9.352800017205724e-06, "loss": 0.3936, "step": 1941 }, { "epoch": 0.18900243309002432, "grad_norm": 1.3673811421181858, "learning_rate": 9.352024234411315e-06, "loss": 0.4448, "step": 1942 }, { "epoch": 0.18909975669099757, "grad_norm": 1.1481373712644614, "learning_rate": 9.351248019153582e-06, "loss": 0.3226, "step": 1943 }, { "epoch": 0.1891970802919708, "grad_norm": 1.025014870233366, "learning_rate": 9.350471371509659e-06, "loss": 0.2095, "step": 1944 }, { "epoch": 0.18929440389294405, "grad_norm": 1.6587902238420225, "learning_rate": 9.349694291556723e-06, "loss": 0.3805, "step": 1945 }, { "epoch": 0.18939172749391728, "grad_norm": 1.568770301353131, "learning_rate": 9.348916779371993e-06, "loss": 0.3902, "step": 1946 }, { "epoch": 0.1894890510948905, "grad_norm": 1.4274566422005779, "learning_rate": 9.348138835032727e-06, "loss": 0.3644, "step": 1947 }, { "epoch": 0.18958637469586376, "grad_norm": 1.6590398647584288, "learning_rate": 9.347360458616233e-06, "loss": 0.3522, "step": 1948 }, { "epoch": 0.18968369829683698, "grad_norm": 1.5905934658559544, "learning_rate": 9.346581650199859e-06, "loss": 0.3784, "step": 1949 }, { "epoch": 0.1897810218978102, "grad_norm": 1.358850838464726, "learning_rate": 9.345802409860995e-06, "loss": 0.3407, "step": 1950 }, { "epoch": 0.18987834549878346, "grad_norm": 1.5906740312195304, "learning_rate": 9.345022737677073e-06, "loss": 0.4735, "step": 1951 }, { "epoch": 0.1899756690997567, "grad_norm": 1.419279223309371, "learning_rate": 9.344242633725573e-06, "loss": 0.4677, "step": 1952 }, { "epoch": 0.19007299270072991, "grad_norm": 2.368125402390624, "learning_rate": 9.34346209808401e-06, "loss": 0.4341, "step": 1953 }, { "epoch": 0.19017031630170317, "grad_norm": 1.6018933954570558, "learning_rate": 9.342681130829949e-06, "loss": 0.4348, "step": 1954 }, { "epoch": 0.1902676399026764, "grad_norm": 1.4757982324740848, "learning_rate": 9.341899732040996e-06, "loss": 0.393, "step": 1955 }, { "epoch": 0.19036496350364965, "grad_norm": 1.463093762624457, "learning_rate": 9.341117901794797e-06, "loss": 0.3787, "step": 1956 }, { "epoch": 0.19046228710462287, "grad_norm": 1.5507561900230402, "learning_rate": 9.340335640169045e-06, "loss": 0.4715, "step": 1957 }, { "epoch": 0.1905596107055961, "grad_norm": 1.4207468273121375, "learning_rate": 9.339552947241471e-06, "loss": 0.3938, "step": 1958 }, { "epoch": 0.19065693430656935, "grad_norm": 1.407596113402629, "learning_rate": 9.338769823089853e-06, "loss": 0.4965, "step": 1959 }, { "epoch": 0.19075425790754258, "grad_norm": 1.5505869092648736, "learning_rate": 9.337986267792014e-06, "loss": 0.3699, "step": 1960 }, { "epoch": 0.1908515815085158, "grad_norm": 1.4558635051434323, "learning_rate": 9.33720228142581e-06, "loss": 0.3436, "step": 1961 }, { "epoch": 0.19094890510948906, "grad_norm": 1.4210127007858437, "learning_rate": 9.336417864069152e-06, "loss": 0.3959, "step": 1962 }, { "epoch": 0.19104622871046228, "grad_norm": 1.5797691467496429, "learning_rate": 9.335633015799983e-06, "loss": 0.5438, "step": 1963 }, { "epoch": 0.1911435523114355, "grad_norm": 1.200940613853037, "learning_rate": 9.334847736696297e-06, "loss": 0.3037, "step": 1964 }, { "epoch": 0.19124087591240876, "grad_norm": 1.6206966051553, "learning_rate": 9.334062026836128e-06, "loss": 0.6412, "step": 1965 }, { "epoch": 0.191338199513382, "grad_norm": 1.3678147539203456, "learning_rate": 9.33327588629755e-06, "loss": 0.328, "step": 1966 }, { "epoch": 0.19143552311435524, "grad_norm": 1.425436568728509, "learning_rate": 9.332489315158685e-06, "loss": 0.42, "step": 1967 }, { "epoch": 0.19153284671532847, "grad_norm": 1.4740185495034979, "learning_rate": 9.331702313497693e-06, "loss": 0.3563, "step": 1968 }, { "epoch": 0.1916301703163017, "grad_norm": 1.4865130636524604, "learning_rate": 9.33091488139278e-06, "loss": 0.3452, "step": 1969 }, { "epoch": 0.19172749391727495, "grad_norm": 1.595704917953399, "learning_rate": 9.330127018922195e-06, "loss": 0.6593, "step": 1970 }, { "epoch": 0.19182481751824818, "grad_norm": 1.4305855687191487, "learning_rate": 9.329338726164225e-06, "loss": 0.4935, "step": 1971 }, { "epoch": 0.1919221411192214, "grad_norm": 1.4810316480182457, "learning_rate": 9.328550003197203e-06, "loss": 0.4303, "step": 1972 }, { "epoch": 0.19201946472019465, "grad_norm": 1.1937939840472271, "learning_rate": 9.32776085009951e-06, "loss": 0.3178, "step": 1973 }, { "epoch": 0.19211678832116788, "grad_norm": 1.3344201288029265, "learning_rate": 9.326971266949558e-06, "loss": 0.3469, "step": 1974 }, { "epoch": 0.1922141119221411, "grad_norm": 1.5818137690503504, "learning_rate": 9.326181253825813e-06, "loss": 0.505, "step": 1975 }, { "epoch": 0.19231143552311436, "grad_norm": 1.263126969220317, "learning_rate": 9.325390810806778e-06, "loss": 0.3967, "step": 1976 }, { "epoch": 0.19240875912408759, "grad_norm": 1.6967730581105949, "learning_rate": 9.324599937971e-06, "loss": 0.7353, "step": 1977 }, { "epoch": 0.19250608272506084, "grad_norm": 1.4550804189369502, "learning_rate": 9.323808635397067e-06, "loss": 0.3326, "step": 1978 }, { "epoch": 0.19260340632603407, "grad_norm": 1.594493767215082, "learning_rate": 9.323016903163612e-06, "loss": 0.4547, "step": 1979 }, { "epoch": 0.1927007299270073, "grad_norm": 1.4855552398261571, "learning_rate": 9.322224741349313e-06, "loss": 0.5095, "step": 1980 }, { "epoch": 0.19279805352798055, "grad_norm": 1.3769945503658922, "learning_rate": 9.321432150032884e-06, "loss": 0.3853, "step": 1981 }, { "epoch": 0.19289537712895377, "grad_norm": 1.3138128708042736, "learning_rate": 9.320639129293083e-06, "loss": 0.4129, "step": 1982 }, { "epoch": 0.192992700729927, "grad_norm": 1.4617598559962484, "learning_rate": 9.319845679208719e-06, "loss": 0.449, "step": 1983 }, { "epoch": 0.19309002433090025, "grad_norm": 1.6332060417216765, "learning_rate": 9.319051799858633e-06, "loss": 0.594, "step": 1984 }, { "epoch": 0.19318734793187348, "grad_norm": 1.5432637765560855, "learning_rate": 9.318257491321714e-06, "loss": 0.3465, "step": 1985 }, { "epoch": 0.1932846715328467, "grad_norm": 1.4536395238750577, "learning_rate": 9.317462753676895e-06, "loss": 0.4212, "step": 1986 }, { "epoch": 0.19338199513381996, "grad_norm": 1.3985266204226148, "learning_rate": 9.31666758700315e-06, "loss": 0.5313, "step": 1987 }, { "epoch": 0.19347931873479318, "grad_norm": 1.4329939166816383, "learning_rate": 9.315871991379493e-06, "loss": 0.3958, "step": 1988 }, { "epoch": 0.19357664233576644, "grad_norm": 1.3666417803863316, "learning_rate": 9.315075966884984e-06, "loss": 0.462, "step": 1989 }, { "epoch": 0.19367396593673966, "grad_norm": 1.6059064802064114, "learning_rate": 9.314279513598721e-06, "loss": 0.5734, "step": 1990 }, { "epoch": 0.1937712895377129, "grad_norm": 1.521730062801285, "learning_rate": 9.313482631599854e-06, "loss": 0.3479, "step": 1991 }, { "epoch": 0.19386861313868614, "grad_norm": 1.5212897395363751, "learning_rate": 9.312685320967566e-06, "loss": 0.4328, "step": 1992 }, { "epoch": 0.19396593673965937, "grad_norm": 1.669365255826549, "learning_rate": 9.311887581781086e-06, "loss": 0.6153, "step": 1993 }, { "epoch": 0.1940632603406326, "grad_norm": 1.1692329123053622, "learning_rate": 9.311089414119688e-06, "loss": 0.3149, "step": 1994 }, { "epoch": 0.19416058394160585, "grad_norm": 1.4724909439197027, "learning_rate": 9.310290818062683e-06, "loss": 0.478, "step": 1995 }, { "epoch": 0.19425790754257907, "grad_norm": 1.667688851021317, "learning_rate": 9.309491793689431e-06, "loss": 0.6192, "step": 1996 }, { "epoch": 0.1943552311435523, "grad_norm": 1.2423474670669281, "learning_rate": 9.30869234107933e-06, "loss": 0.4242, "step": 1997 }, { "epoch": 0.19445255474452555, "grad_norm": 1.4117486896728357, "learning_rate": 9.307892460311825e-06, "loss": 0.4417, "step": 1998 }, { "epoch": 0.19454987834549878, "grad_norm": 1.6605518542896853, "learning_rate": 9.307092151466397e-06, "loss": 0.5289, "step": 1999 }, { "epoch": 0.19464720194647203, "grad_norm": 1.661933360658536, "learning_rate": 9.306291414622575e-06, "loss": 0.3357, "step": 2000 }, { "epoch": 0.19474452554744526, "grad_norm": 1.4409618985011814, "learning_rate": 9.305490249859927e-06, "loss": 0.4563, "step": 2001 }, { "epoch": 0.19484184914841848, "grad_norm": 1.9082899591217046, "learning_rate": 9.304688657258068e-06, "loss": 0.3445, "step": 2002 }, { "epoch": 0.19493917274939174, "grad_norm": 1.2157434891172034, "learning_rate": 9.303886636896649e-06, "loss": 0.3719, "step": 2003 }, { "epoch": 0.19503649635036496, "grad_norm": 1.57236888854409, "learning_rate": 9.303084188855371e-06, "loss": 0.4399, "step": 2004 }, { "epoch": 0.1951338199513382, "grad_norm": 1.4041570559360463, "learning_rate": 9.302281313213973e-06, "loss": 0.4442, "step": 2005 }, { "epoch": 0.19523114355231144, "grad_norm": 1.595081147428658, "learning_rate": 9.301478010052237e-06, "loss": 0.4225, "step": 2006 }, { "epoch": 0.19532846715328467, "grad_norm": 1.562924823229517, "learning_rate": 9.300674279449986e-06, "loss": 0.3739, "step": 2007 }, { "epoch": 0.1954257907542579, "grad_norm": 1.6925679153497177, "learning_rate": 9.299870121487088e-06, "loss": 0.4465, "step": 2008 }, { "epoch": 0.19552311435523115, "grad_norm": 1.4955175500348226, "learning_rate": 9.299065536243453e-06, "loss": 0.5055, "step": 2009 }, { "epoch": 0.19562043795620437, "grad_norm": 1.5602814755448668, "learning_rate": 9.298260523799035e-06, "loss": 0.4214, "step": 2010 }, { "epoch": 0.19571776155717763, "grad_norm": 1.4678189187481074, "learning_rate": 9.297455084233826e-06, "loss": 0.4221, "step": 2011 }, { "epoch": 0.19581508515815085, "grad_norm": 1.1014848505883976, "learning_rate": 9.296649217627863e-06, "loss": 0.2531, "step": 2012 }, { "epoch": 0.19591240875912408, "grad_norm": 1.553421501855423, "learning_rate": 9.295842924061227e-06, "loss": 0.5409, "step": 2013 }, { "epoch": 0.19600973236009733, "grad_norm": 1.598118050761176, "learning_rate": 9.295036203614039e-06, "loss": 0.4084, "step": 2014 }, { "epoch": 0.19610705596107056, "grad_norm": 1.6278848716274248, "learning_rate": 9.294229056366464e-06, "loss": 0.5842, "step": 2015 }, { "epoch": 0.19620437956204378, "grad_norm": 1.243515264701947, "learning_rate": 9.293421482398708e-06, "loss": 0.3504, "step": 2016 }, { "epoch": 0.19630170316301704, "grad_norm": 1.4687425329140307, "learning_rate": 9.29261348179102e-06, "loss": 0.2732, "step": 2017 }, { "epoch": 0.19639902676399026, "grad_norm": 1.8000259635960119, "learning_rate": 9.291805054623691e-06, "loss": 0.7865, "step": 2018 }, { "epoch": 0.1964963503649635, "grad_norm": 1.5721673591186547, "learning_rate": 9.290996200977058e-06, "loss": 0.5686, "step": 2019 }, { "epoch": 0.19659367396593674, "grad_norm": 1.4634877349944297, "learning_rate": 9.290186920931493e-06, "loss": 0.4884, "step": 2020 }, { "epoch": 0.19669099756690997, "grad_norm": 1.8795352763168436, "learning_rate": 9.289377214567418e-06, "loss": 0.279, "step": 2021 }, { "epoch": 0.19678832116788322, "grad_norm": 1.2525962570268505, "learning_rate": 9.288567081965292e-06, "loss": 0.3003, "step": 2022 }, { "epoch": 0.19688564476885645, "grad_norm": 1.4414518188882164, "learning_rate": 9.28775652320562e-06, "loss": 0.2883, "step": 2023 }, { "epoch": 0.19698296836982968, "grad_norm": 1.1469869990322892, "learning_rate": 9.286945538368946e-06, "loss": 0.301, "step": 2024 }, { "epoch": 0.19708029197080293, "grad_norm": 1.4386800814955665, "learning_rate": 9.286134127535859e-06, "loss": 0.417, "step": 2025 }, { "epoch": 0.19717761557177615, "grad_norm": 1.4334168701816348, "learning_rate": 9.28532229078699e-06, "loss": 0.4694, "step": 2026 }, { "epoch": 0.19727493917274938, "grad_norm": 1.2925159318336792, "learning_rate": 9.28451002820301e-06, "loss": 0.4438, "step": 2027 }, { "epoch": 0.19737226277372263, "grad_norm": 1.1608723700468837, "learning_rate": 9.283697339864635e-06, "loss": 0.3899, "step": 2028 }, { "epoch": 0.19746958637469586, "grad_norm": 1.0831308664734243, "learning_rate": 9.282884225852625e-06, "loss": 0.3594, "step": 2029 }, { "epoch": 0.19756690997566909, "grad_norm": 1.3854325468066278, "learning_rate": 9.282070686247773e-06, "loss": 0.5111, "step": 2030 }, { "epoch": 0.19766423357664234, "grad_norm": 1.2843702051671877, "learning_rate": 9.281256721130927e-06, "loss": 0.3298, "step": 2031 }, { "epoch": 0.19776155717761557, "grad_norm": 1.4725158786403292, "learning_rate": 9.280442330582968e-06, "loss": 0.4776, "step": 2032 }, { "epoch": 0.19785888077858882, "grad_norm": 1.2748346913452204, "learning_rate": 9.279627514684826e-06, "loss": 0.4438, "step": 2033 }, { "epoch": 0.19795620437956205, "grad_norm": 1.406716290626126, "learning_rate": 9.278812273517465e-06, "loss": 0.2814, "step": 2034 }, { "epoch": 0.19805352798053527, "grad_norm": 1.3303438388967537, "learning_rate": 9.2779966071619e-06, "loss": 0.4314, "step": 2035 }, { "epoch": 0.19815085158150852, "grad_norm": 1.4134730169408085, "learning_rate": 9.277180515699183e-06, "loss": 0.2764, "step": 2036 }, { "epoch": 0.19824817518248175, "grad_norm": 1.3255645305073551, "learning_rate": 9.276363999210407e-06, "loss": 0.4347, "step": 2037 }, { "epoch": 0.19834549878345498, "grad_norm": 1.4369644328356708, "learning_rate": 9.275547057776713e-06, "loss": 0.3551, "step": 2038 }, { "epoch": 0.19844282238442823, "grad_norm": 1.748281657046459, "learning_rate": 9.27472969147928e-06, "loss": 0.4372, "step": 2039 }, { "epoch": 0.19854014598540146, "grad_norm": 1.2795189118800725, "learning_rate": 9.273911900399331e-06, "loss": 0.4431, "step": 2040 }, { "epoch": 0.1986374695863747, "grad_norm": 1.165526474375854, "learning_rate": 9.273093684618129e-06, "loss": 0.2936, "step": 2041 }, { "epoch": 0.19873479318734794, "grad_norm": 1.6068781771010836, "learning_rate": 9.272275044216981e-06, "loss": 0.5125, "step": 2042 }, { "epoch": 0.19883211678832116, "grad_norm": 1.4210491087425543, "learning_rate": 9.271455979277234e-06, "loss": 0.4142, "step": 2043 }, { "epoch": 0.19892944038929442, "grad_norm": 1.6609287753373938, "learning_rate": 9.270636489880283e-06, "loss": 0.6728, "step": 2044 }, { "epoch": 0.19902676399026764, "grad_norm": 1.3902108507987736, "learning_rate": 9.26981657610756e-06, "loss": 0.3492, "step": 2045 }, { "epoch": 0.19912408759124087, "grad_norm": 1.6316422644879316, "learning_rate": 9.268996238040537e-06, "loss": 0.5029, "step": 2046 }, { "epoch": 0.19922141119221412, "grad_norm": 1.2841836791466006, "learning_rate": 9.268175475760734e-06, "loss": 0.3849, "step": 2047 }, { "epoch": 0.19931873479318735, "grad_norm": 1.319713524379575, "learning_rate": 9.267354289349712e-06, "loss": 0.4439, "step": 2048 }, { "epoch": 0.19941605839416057, "grad_norm": 1.3549935774985267, "learning_rate": 9.266532678889071e-06, "loss": 0.4382, "step": 2049 }, { "epoch": 0.19951338199513383, "grad_norm": 1.8518976479625036, "learning_rate": 9.265710644460455e-06, "loss": 0.8216, "step": 2050 }, { "epoch": 0.19961070559610705, "grad_norm": 1.9509154982810264, "learning_rate": 9.26488818614555e-06, "loss": 0.4607, "step": 2051 }, { "epoch": 0.1997080291970803, "grad_norm": 1.2954164138913125, "learning_rate": 9.264065304026087e-06, "loss": 0.4257, "step": 2052 }, { "epoch": 0.19980535279805353, "grad_norm": 1.925685176039115, "learning_rate": 9.26324199818383e-06, "loss": 0.6025, "step": 2053 }, { "epoch": 0.19990267639902676, "grad_norm": 1.533947029174009, "learning_rate": 9.262418268700596e-06, "loss": 0.5443, "step": 2054 }, { "epoch": 0.2, "grad_norm": 1.4995274594175463, "learning_rate": 9.26159411565824e-06, "loss": 0.5023, "step": 2055 }, { "epoch": 0.20009732360097324, "grad_norm": 1.4350182215101954, "learning_rate": 9.26076953913866e-06, "loss": 0.3726, "step": 2056 }, { "epoch": 0.20019464720194646, "grad_norm": 1.3019491914952392, "learning_rate": 9.259944539223788e-06, "loss": 0.4765, "step": 2057 }, { "epoch": 0.20029197080291972, "grad_norm": 1.3884509805578256, "learning_rate": 9.25911911599561e-06, "loss": 0.338, "step": 2058 }, { "epoch": 0.20038929440389294, "grad_norm": 1.488048064619486, "learning_rate": 9.258293269536146e-06, "loss": 0.5872, "step": 2059 }, { "epoch": 0.20048661800486617, "grad_norm": 1.1548733119099643, "learning_rate": 9.257466999927464e-06, "loss": 0.3242, "step": 2060 }, { "epoch": 0.20058394160583942, "grad_norm": 1.048222542797774, "learning_rate": 9.25664030725167e-06, "loss": 0.3253, "step": 2061 }, { "epoch": 0.20068126520681265, "grad_norm": 1.211590892113714, "learning_rate": 9.255813191590912e-06, "loss": 0.3414, "step": 2062 }, { "epoch": 0.2007785888077859, "grad_norm": 1.3770802107798175, "learning_rate": 9.254985653027382e-06, "loss": 0.4031, "step": 2063 }, { "epoch": 0.20087591240875913, "grad_norm": 1.4503315973945832, "learning_rate": 9.25415769164331e-06, "loss": 0.4799, "step": 2064 }, { "epoch": 0.20097323600973235, "grad_norm": 1.3613570222565128, "learning_rate": 9.253329307520976e-06, "loss": 0.3932, "step": 2065 }, { "epoch": 0.2010705596107056, "grad_norm": 1.436956883536887, "learning_rate": 9.252500500742692e-06, "loss": 0.51, "step": 2066 }, { "epoch": 0.20116788321167883, "grad_norm": 1.3042874208229347, "learning_rate": 9.25167127139082e-06, "loss": 0.3702, "step": 2067 }, { "epoch": 0.20126520681265206, "grad_norm": 1.4601934649693376, "learning_rate": 9.250841619547762e-06, "loss": 0.3927, "step": 2068 }, { "epoch": 0.2013625304136253, "grad_norm": 1.4877017036692342, "learning_rate": 9.250011545295959e-06, "loss": 0.5463, "step": 2069 }, { "epoch": 0.20145985401459854, "grad_norm": 1.3385891837902342, "learning_rate": 9.249181048717895e-06, "loss": 0.3052, "step": 2070 }, { "epoch": 0.20155717761557176, "grad_norm": 1.111892744483471, "learning_rate": 9.2483501298961e-06, "loss": 0.2342, "step": 2071 }, { "epoch": 0.20165450121654502, "grad_norm": 1.4336755713622584, "learning_rate": 9.247518788913141e-06, "loss": 0.4416, "step": 2072 }, { "epoch": 0.20175182481751824, "grad_norm": 1.4682039909825075, "learning_rate": 9.246687025851629e-06, "loss": 0.3044, "step": 2073 }, { "epoch": 0.2018491484184915, "grad_norm": 1.1356161216510552, "learning_rate": 9.245854840794217e-06, "loss": 0.2913, "step": 2074 }, { "epoch": 0.20194647201946472, "grad_norm": 1.2497989015941582, "learning_rate": 9.2450222338236e-06, "loss": 0.356, "step": 2075 }, { "epoch": 0.20204379562043795, "grad_norm": 1.4662802201560914, "learning_rate": 9.244189205022514e-06, "loss": 0.5234, "step": 2076 }, { "epoch": 0.2021411192214112, "grad_norm": 1.1493994388606168, "learning_rate": 9.243355754473738e-06, "loss": 0.3862, "step": 2077 }, { "epoch": 0.20223844282238443, "grad_norm": 1.1352456631925198, "learning_rate": 9.242521882260093e-06, "loss": 0.3693, "step": 2078 }, { "epoch": 0.20233576642335765, "grad_norm": 1.4112847797443164, "learning_rate": 9.24168758846444e-06, "loss": 0.4667, "step": 2079 }, { "epoch": 0.2024330900243309, "grad_norm": 1.9587086933310962, "learning_rate": 9.240852873169686e-06, "loss": 0.5446, "step": 2080 }, { "epoch": 0.20253041362530413, "grad_norm": 1.4532595336328356, "learning_rate": 9.240017736458772e-06, "loss": 0.56, "step": 2081 }, { "epoch": 0.20262773722627736, "grad_norm": 1.1373158358211433, "learning_rate": 9.239182178414694e-06, "loss": 0.3998, "step": 2082 }, { "epoch": 0.2027250608272506, "grad_norm": 1.4892855081953407, "learning_rate": 9.238346199120473e-06, "loss": 0.5564, "step": 2083 }, { "epoch": 0.20282238442822384, "grad_norm": 1.4122351541601532, "learning_rate": 9.237509798659188e-06, "loss": 0.4407, "step": 2084 }, { "epoch": 0.2029197080291971, "grad_norm": 1.266747153803517, "learning_rate": 9.236672977113948e-06, "loss": 0.3898, "step": 2085 }, { "epoch": 0.20301703163017032, "grad_norm": 1.3972737248894866, "learning_rate": 9.23583573456791e-06, "loss": 0.4855, "step": 2086 }, { "epoch": 0.20311435523114355, "grad_norm": 1.6424190339871019, "learning_rate": 9.234998071104272e-06, "loss": 0.732, "step": 2087 }, { "epoch": 0.2032116788321168, "grad_norm": 1.4973722328869334, "learning_rate": 9.234159986806275e-06, "loss": 0.4796, "step": 2088 }, { "epoch": 0.20330900243309002, "grad_norm": 1.5629802728678386, "learning_rate": 9.233321481757196e-06, "loss": 0.4762, "step": 2089 }, { "epoch": 0.20340632603406325, "grad_norm": 1.5273353205689704, "learning_rate": 9.23248255604036e-06, "loss": 0.6446, "step": 2090 }, { "epoch": 0.2035036496350365, "grad_norm": 1.3835329237350877, "learning_rate": 9.231643209739128e-06, "loss": 0.5297, "step": 2091 }, { "epoch": 0.20360097323600973, "grad_norm": 1.2187102873763251, "learning_rate": 9.230803442936911e-06, "loss": 0.3727, "step": 2092 }, { "epoch": 0.20369829683698296, "grad_norm": 1.325749011032711, "learning_rate": 9.229963255717156e-06, "loss": 0.5476, "step": 2093 }, { "epoch": 0.2037956204379562, "grad_norm": 1.1246093495598513, "learning_rate": 9.229122648163351e-06, "loss": 0.3309, "step": 2094 }, { "epoch": 0.20389294403892944, "grad_norm": 1.3415111254139396, "learning_rate": 9.22828162035903e-06, "loss": 0.4226, "step": 2095 }, { "epoch": 0.2039902676399027, "grad_norm": 1.2431047519820402, "learning_rate": 9.227440172387766e-06, "loss": 0.2364, "step": 2096 }, { "epoch": 0.20408759124087592, "grad_norm": 1.59824202042343, "learning_rate": 9.226598304333175e-06, "loss": 0.5713, "step": 2097 }, { "epoch": 0.20418491484184914, "grad_norm": 1.3718145057357327, "learning_rate": 9.22575601627891e-06, "loss": 0.4366, "step": 2098 }, { "epoch": 0.2042822384428224, "grad_norm": 1.8310954422547832, "learning_rate": 9.224913308308672e-06, "loss": 0.4098, "step": 2099 }, { "epoch": 0.20437956204379562, "grad_norm": 1.3433956299970118, "learning_rate": 9.224070180506202e-06, "loss": 0.2959, "step": 2100 }, { "epoch": 0.20447688564476885, "grad_norm": 1.0277615122037833, "learning_rate": 9.223226632955283e-06, "loss": 0.265, "step": 2101 }, { "epoch": 0.2045742092457421, "grad_norm": 1.2285380399323877, "learning_rate": 9.222382665739737e-06, "loss": 0.3844, "step": 2102 }, { "epoch": 0.20467153284671533, "grad_norm": 1.1151094116106592, "learning_rate": 9.221538278943432e-06, "loss": 0.2461, "step": 2103 }, { "epoch": 0.20476885644768855, "grad_norm": 1.5239102143699876, "learning_rate": 9.22069347265027e-06, "loss": 0.4239, "step": 2104 }, { "epoch": 0.2048661800486618, "grad_norm": 1.6502658051911525, "learning_rate": 9.219848246944206e-06, "loss": 0.6723, "step": 2105 }, { "epoch": 0.20496350364963503, "grad_norm": 1.638974040274465, "learning_rate": 9.219002601909229e-06, "loss": 0.5068, "step": 2106 }, { "epoch": 0.20506082725060829, "grad_norm": 1.4649352184984061, "learning_rate": 9.218156537629368e-06, "loss": 0.4698, "step": 2107 }, { "epoch": 0.2051581508515815, "grad_norm": 1.5070786345583258, "learning_rate": 9.217310054188699e-06, "loss": 0.4654, "step": 2108 }, { "epoch": 0.20525547445255474, "grad_norm": 1.2480947756940115, "learning_rate": 9.216463151671338e-06, "loss": 0.3614, "step": 2109 }, { "epoch": 0.205352798053528, "grad_norm": 1.6536121595263205, "learning_rate": 9.215615830161443e-06, "loss": 0.5872, "step": 2110 }, { "epoch": 0.20545012165450122, "grad_norm": 1.5559546132859907, "learning_rate": 9.214768089743211e-06, "loss": 0.5098, "step": 2111 }, { "epoch": 0.20554744525547444, "grad_norm": 1.5691593927804695, "learning_rate": 9.213919930500884e-06, "loss": 0.3845, "step": 2112 }, { "epoch": 0.2056447688564477, "grad_norm": 1.4385010923740136, "learning_rate": 9.213071352518744e-06, "loss": 0.4035, "step": 2113 }, { "epoch": 0.20574209245742092, "grad_norm": 1.2415148755341134, "learning_rate": 9.212222355881111e-06, "loss": 0.2503, "step": 2114 }, { "epoch": 0.20583941605839415, "grad_norm": 1.597224767194554, "learning_rate": 9.211372940672356e-06, "loss": 0.3831, "step": 2115 }, { "epoch": 0.2059367396593674, "grad_norm": 1.3936071245663937, "learning_rate": 9.210523106976884e-06, "loss": 0.3664, "step": 2116 }, { "epoch": 0.20603406326034063, "grad_norm": 1.4335641468120297, "learning_rate": 9.209672854879142e-06, "loss": 0.3182, "step": 2117 }, { "epoch": 0.20613138686131388, "grad_norm": 1.2544256067640176, "learning_rate": 9.20882218446362e-06, "loss": 0.2678, "step": 2118 }, { "epoch": 0.2062287104622871, "grad_norm": 1.4867246001264303, "learning_rate": 9.207971095814852e-06, "loss": 0.4934, "step": 2119 }, { "epoch": 0.20632603406326033, "grad_norm": 1.5387304887069146, "learning_rate": 9.207119589017408e-06, "loss": 0.4552, "step": 2120 }, { "epoch": 0.2064233576642336, "grad_norm": 1.507156387441411, "learning_rate": 9.206267664155906e-06, "loss": 0.4209, "step": 2121 }, { "epoch": 0.2065206812652068, "grad_norm": 1.3407732350308024, "learning_rate": 9.205415321315e-06, "loss": 0.4256, "step": 2122 }, { "epoch": 0.20661800486618004, "grad_norm": 1.6313949345186305, "learning_rate": 9.20456256057939e-06, "loss": 0.4727, "step": 2123 }, { "epoch": 0.2067153284671533, "grad_norm": 1.695026004332969, "learning_rate": 9.203709382033814e-06, "loss": 0.6547, "step": 2124 }, { "epoch": 0.20681265206812652, "grad_norm": 1.5677721722384952, "learning_rate": 9.202855785763053e-06, "loss": 0.4469, "step": 2125 }, { "epoch": 0.20690997566909974, "grad_norm": 1.4276579746412523, "learning_rate": 9.202001771851928e-06, "loss": 0.4511, "step": 2126 }, { "epoch": 0.207007299270073, "grad_norm": 1.365652083209099, "learning_rate": 9.201147340385304e-06, "loss": 0.4435, "step": 2127 }, { "epoch": 0.20710462287104622, "grad_norm": 1.4014399599326692, "learning_rate": 9.200292491448086e-06, "loss": 0.4017, "step": 2128 }, { "epoch": 0.20720194647201948, "grad_norm": 1.4131798281318602, "learning_rate": 9.199437225125223e-06, "loss": 0.2781, "step": 2129 }, { "epoch": 0.2072992700729927, "grad_norm": 1.3392698432345278, "learning_rate": 9.198581541501702e-06, "loss": 0.3576, "step": 2130 }, { "epoch": 0.20739659367396593, "grad_norm": 1.2859171090531423, "learning_rate": 9.197725440662552e-06, "loss": 0.4505, "step": 2131 }, { "epoch": 0.20749391727493918, "grad_norm": 1.3075221898254676, "learning_rate": 9.196868922692845e-06, "loss": 0.42, "step": 2132 }, { "epoch": 0.2075912408759124, "grad_norm": 1.3120969425940014, "learning_rate": 9.196011987677693e-06, "loss": 0.3918, "step": 2133 }, { "epoch": 0.20768856447688563, "grad_norm": 1.2917866907447901, "learning_rate": 9.19515463570225e-06, "loss": 0.4515, "step": 2134 }, { "epoch": 0.2077858880778589, "grad_norm": 1.4964227937052923, "learning_rate": 9.194296866851714e-06, "loss": 0.4007, "step": 2135 }, { "epoch": 0.2078832116788321, "grad_norm": 1.4096694486456338, "learning_rate": 9.19343868121132e-06, "loss": 0.5684, "step": 2136 }, { "epoch": 0.20798053527980534, "grad_norm": 1.1303877036272907, "learning_rate": 9.192580078866346e-06, "loss": 0.2661, "step": 2137 }, { "epoch": 0.2080778588807786, "grad_norm": 1.4056619474271335, "learning_rate": 9.191721059902112e-06, "loss": 0.4174, "step": 2138 }, { "epoch": 0.20817518248175182, "grad_norm": 1.7142064467904727, "learning_rate": 9.190861624403981e-06, "loss": 0.4453, "step": 2139 }, { "epoch": 0.20827250608272507, "grad_norm": 1.3293557691236777, "learning_rate": 9.190001772457356e-06, "loss": 0.4541, "step": 2140 }, { "epoch": 0.2083698296836983, "grad_norm": 1.6131133576379075, "learning_rate": 9.189141504147676e-06, "loss": 0.3751, "step": 2141 }, { "epoch": 0.20846715328467152, "grad_norm": 1.509737357483189, "learning_rate": 9.188280819560431e-06, "loss": 0.4757, "step": 2142 }, { "epoch": 0.20856447688564478, "grad_norm": 1.479538114231473, "learning_rate": 9.187419718781149e-06, "loss": 0.3243, "step": 2143 }, { "epoch": 0.208661800486618, "grad_norm": 1.4973982658919327, "learning_rate": 9.186558201895395e-06, "loss": 0.3732, "step": 2144 }, { "epoch": 0.20875912408759123, "grad_norm": 1.5121453838943797, "learning_rate": 9.185696268988777e-06, "loss": 0.5435, "step": 2145 }, { "epoch": 0.20885644768856448, "grad_norm": 1.7349033410138828, "learning_rate": 9.18483392014695e-06, "loss": 0.6415, "step": 2146 }, { "epoch": 0.2089537712895377, "grad_norm": 1.4812330220855032, "learning_rate": 9.183971155455602e-06, "loss": 0.4961, "step": 2147 }, { "epoch": 0.20905109489051094, "grad_norm": 1.5121767597167877, "learning_rate": 9.183107975000472e-06, "loss": 0.5298, "step": 2148 }, { "epoch": 0.2091484184914842, "grad_norm": 1.5424817825799644, "learning_rate": 9.18224437886733e-06, "loss": 0.4577, "step": 2149 }, { "epoch": 0.20924574209245742, "grad_norm": 1.2733853569354763, "learning_rate": 9.181380367141991e-06, "loss": 0.3306, "step": 2150 }, { "epoch": 0.20934306569343067, "grad_norm": 1.1384650904715041, "learning_rate": 9.180515939910317e-06, "loss": 0.3831, "step": 2151 }, { "epoch": 0.2094403892944039, "grad_norm": 1.3798308474076018, "learning_rate": 9.179651097258204e-06, "loss": 0.4629, "step": 2152 }, { "epoch": 0.20953771289537712, "grad_norm": 1.4059733648531154, "learning_rate": 9.178785839271593e-06, "loss": 0.4526, "step": 2153 }, { "epoch": 0.20963503649635037, "grad_norm": 1.581039004516103, "learning_rate": 9.177920166036464e-06, "loss": 0.5397, "step": 2154 }, { "epoch": 0.2097323600973236, "grad_norm": 1.4851118969101265, "learning_rate": 9.17705407763884e-06, "loss": 0.5052, "step": 2155 }, { "epoch": 0.20982968369829683, "grad_norm": 1.3633687775503893, "learning_rate": 9.176187574164785e-06, "loss": 0.4427, "step": 2156 }, { "epoch": 0.20992700729927008, "grad_norm": 1.360319094739405, "learning_rate": 9.175320655700407e-06, "loss": 0.3649, "step": 2157 }, { "epoch": 0.2100243309002433, "grad_norm": 1.3829673206277566, "learning_rate": 9.174453322331844e-06, "loss": 0.3536, "step": 2158 }, { "epoch": 0.21012165450121653, "grad_norm": 1.5804059757696094, "learning_rate": 9.173585574145292e-06, "loss": 0.5937, "step": 2159 }, { "epoch": 0.21021897810218979, "grad_norm": 1.4991084469228289, "learning_rate": 9.172717411226975e-06, "loss": 0.3523, "step": 2160 }, { "epoch": 0.210316301703163, "grad_norm": 1.4762289487935065, "learning_rate": 9.171848833663165e-06, "loss": 0.4991, "step": 2161 }, { "epoch": 0.21041362530413626, "grad_norm": 1.4858484283610454, "learning_rate": 9.17097984154017e-06, "loss": 0.5153, "step": 2162 }, { "epoch": 0.2105109489051095, "grad_norm": 1.2647097068290445, "learning_rate": 9.170110434944345e-06, "loss": 0.3193, "step": 2163 }, { "epoch": 0.21060827250608272, "grad_norm": 1.6889738075479466, "learning_rate": 9.169240613962086e-06, "loss": 0.4755, "step": 2164 }, { "epoch": 0.21070559610705597, "grad_norm": 1.6464662019172414, "learning_rate": 9.168370378679821e-06, "loss": 0.5303, "step": 2165 }, { "epoch": 0.2108029197080292, "grad_norm": 1.287927301108519, "learning_rate": 9.16749972918403e-06, "loss": 0.3231, "step": 2166 }, { "epoch": 0.21090024330900242, "grad_norm": 1.378935902738664, "learning_rate": 9.16662866556123e-06, "loss": 0.4654, "step": 2167 }, { "epoch": 0.21099756690997568, "grad_norm": 1.415652566603492, "learning_rate": 9.16575718789798e-06, "loss": 0.42, "step": 2168 }, { "epoch": 0.2110948905109489, "grad_norm": 1.189498123796033, "learning_rate": 9.164885296280875e-06, "loss": 0.3529, "step": 2169 }, { "epoch": 0.21119221411192213, "grad_norm": 1.5371351227791108, "learning_rate": 9.16401299079656e-06, "loss": 0.4679, "step": 2170 }, { "epoch": 0.21128953771289538, "grad_norm": 1.2493790037654902, "learning_rate": 9.163140271531714e-06, "loss": 0.3793, "step": 2171 }, { "epoch": 0.2113868613138686, "grad_norm": 1.3836947713855836, "learning_rate": 9.16226713857306e-06, "loss": 0.436, "step": 2172 }, { "epoch": 0.21148418491484186, "grad_norm": 1.583280621035993, "learning_rate": 9.161393592007364e-06, "loss": 0.5673, "step": 2173 }, { "epoch": 0.2115815085158151, "grad_norm": 1.336076606512916, "learning_rate": 9.160519631921427e-06, "loss": 0.418, "step": 2174 }, { "epoch": 0.2116788321167883, "grad_norm": 1.5539773056945747, "learning_rate": 9.159645258402098e-06, "loss": 0.4417, "step": 2175 }, { "epoch": 0.21177615571776157, "grad_norm": 1.35099904216899, "learning_rate": 9.158770471536261e-06, "loss": 0.4389, "step": 2176 }, { "epoch": 0.2118734793187348, "grad_norm": 1.5960801985245197, "learning_rate": 9.157895271410848e-06, "loss": 0.4444, "step": 2177 }, { "epoch": 0.21197080291970802, "grad_norm": 1.343338393224711, "learning_rate": 9.157019658112825e-06, "loss": 0.3867, "step": 2178 }, { "epoch": 0.21206812652068127, "grad_norm": 1.573040163695098, "learning_rate": 9.156143631729205e-06, "loss": 0.5564, "step": 2179 }, { "epoch": 0.2121654501216545, "grad_norm": 1.477194998770335, "learning_rate": 9.155267192347037e-06, "loss": 0.5053, "step": 2180 }, { "epoch": 0.21226277372262772, "grad_norm": 1.4697445687746653, "learning_rate": 9.154390340053414e-06, "loss": 0.4462, "step": 2181 }, { "epoch": 0.21236009732360098, "grad_norm": 1.2383233673923462, "learning_rate": 9.15351307493547e-06, "loss": 0.4023, "step": 2182 }, { "epoch": 0.2124574209245742, "grad_norm": 1.73929160255024, "learning_rate": 9.152635397080377e-06, "loss": 0.456, "step": 2183 }, { "epoch": 0.21255474452554746, "grad_norm": 1.814215933055299, "learning_rate": 9.151757306575354e-06, "loss": 0.5283, "step": 2184 }, { "epoch": 0.21265206812652068, "grad_norm": 1.440140413882406, "learning_rate": 9.150878803507655e-06, "loss": 0.4754, "step": 2185 }, { "epoch": 0.2127493917274939, "grad_norm": 1.4991761170210094, "learning_rate": 9.149999887964577e-06, "loss": 0.4244, "step": 2186 }, { "epoch": 0.21284671532846716, "grad_norm": 1.6045542244692401, "learning_rate": 9.149120560033461e-06, "loss": 0.4149, "step": 2187 }, { "epoch": 0.2129440389294404, "grad_norm": 1.6999406355422166, "learning_rate": 9.148240819801684e-06, "loss": 0.7227, "step": 2188 }, { "epoch": 0.2130413625304136, "grad_norm": 1.5383336234101048, "learning_rate": 9.147360667356667e-06, "loss": 0.4102, "step": 2189 }, { "epoch": 0.21313868613138687, "grad_norm": 1.3100772476716567, "learning_rate": 9.146480102785871e-06, "loss": 0.4001, "step": 2190 }, { "epoch": 0.2132360097323601, "grad_norm": 1.2113504505529646, "learning_rate": 9.1455991261768e-06, "loss": 0.3906, "step": 2191 }, { "epoch": 0.21333333333333335, "grad_norm": 2.1524156395732996, "learning_rate": 9.144717737616994e-06, "loss": 0.3722, "step": 2192 }, { "epoch": 0.21343065693430657, "grad_norm": 1.3156410053212892, "learning_rate": 9.143835937194039e-06, "loss": 0.414, "step": 2193 }, { "epoch": 0.2135279805352798, "grad_norm": 1.382537469614808, "learning_rate": 9.14295372499556e-06, "loss": 0.3687, "step": 2194 }, { "epoch": 0.21362530413625305, "grad_norm": 1.4106617705657403, "learning_rate": 9.142071101109224e-06, "loss": 0.2515, "step": 2195 }, { "epoch": 0.21372262773722628, "grad_norm": 1.4292530170893925, "learning_rate": 9.141188065622736e-06, "loss": 0.4671, "step": 2196 }, { "epoch": 0.2138199513381995, "grad_norm": 1.371262803483025, "learning_rate": 9.140304618623844e-06, "loss": 0.4397, "step": 2197 }, { "epoch": 0.21391727493917276, "grad_norm": 1.3337172412513854, "learning_rate": 9.13942076020034e-06, "loss": 0.4518, "step": 2198 }, { "epoch": 0.21401459854014598, "grad_norm": 1.195478639712577, "learning_rate": 9.138536490440046e-06, "loss": 0.3236, "step": 2199 }, { "epoch": 0.2141119221411192, "grad_norm": 1.6207375008593756, "learning_rate": 9.13765180943084e-06, "loss": 0.5147, "step": 2200 }, { "epoch": 0.21420924574209246, "grad_norm": 1.457360033672521, "learning_rate": 9.136766717260631e-06, "loss": 0.3228, "step": 2201 }, { "epoch": 0.2143065693430657, "grad_norm": 1.2314544120773039, "learning_rate": 9.13588121401737e-06, "loss": 0.3413, "step": 2202 }, { "epoch": 0.21440389294403894, "grad_norm": 1.3614880154600904, "learning_rate": 9.13499529978905e-06, "loss": 0.3902, "step": 2203 }, { "epoch": 0.21450121654501217, "grad_norm": 1.3431981306372034, "learning_rate": 9.134108974663707e-06, "loss": 0.4893, "step": 2204 }, { "epoch": 0.2145985401459854, "grad_norm": 1.346114362934121, "learning_rate": 9.133222238729414e-06, "loss": 0.4195, "step": 2205 }, { "epoch": 0.21469586374695865, "grad_norm": 1.2405202461045035, "learning_rate": 9.132335092074285e-06, "loss": 0.4373, "step": 2206 }, { "epoch": 0.21479318734793187, "grad_norm": 1.2952176269832685, "learning_rate": 9.131447534786478e-06, "loss": 0.3253, "step": 2207 }, { "epoch": 0.2148905109489051, "grad_norm": 1.3497804127584312, "learning_rate": 9.130559566954191e-06, "loss": 0.4401, "step": 2208 }, { "epoch": 0.21498783454987835, "grad_norm": 1.6094605506454212, "learning_rate": 9.129671188665661e-06, "loss": 0.5943, "step": 2209 }, { "epoch": 0.21508515815085158, "grad_norm": 1.7393737788578179, "learning_rate": 9.128782400009167e-06, "loss": 0.6832, "step": 2210 }, { "epoch": 0.2151824817518248, "grad_norm": 1.2888456219960003, "learning_rate": 9.127893201073028e-06, "loss": 0.4449, "step": 2211 }, { "epoch": 0.21527980535279806, "grad_norm": 1.6231451452368957, "learning_rate": 9.127003591945605e-06, "loss": 0.6579, "step": 2212 }, { "epoch": 0.21537712895377129, "grad_norm": 1.4013330754504585, "learning_rate": 9.126113572715296e-06, "loss": 0.5072, "step": 2213 }, { "epoch": 0.21547445255474454, "grad_norm": 1.1928349667862592, "learning_rate": 9.125223143470547e-06, "loss": 0.2896, "step": 2214 }, { "epoch": 0.21557177615571776, "grad_norm": 1.3027255903002162, "learning_rate": 9.124332304299838e-06, "loss": 0.3076, "step": 2215 }, { "epoch": 0.215669099756691, "grad_norm": 1.6527022746103417, "learning_rate": 9.123441055291694e-06, "loss": 0.4688, "step": 2216 }, { "epoch": 0.21576642335766424, "grad_norm": 1.3197927862863625, "learning_rate": 9.122549396534676e-06, "loss": 0.318, "step": 2217 }, { "epoch": 0.21586374695863747, "grad_norm": 1.5297610770776902, "learning_rate": 9.121657328117392e-06, "loss": 0.6176, "step": 2218 }, { "epoch": 0.2159610705596107, "grad_norm": 1.338041823259507, "learning_rate": 9.120764850128486e-06, "loss": 0.3941, "step": 2219 }, { "epoch": 0.21605839416058395, "grad_norm": 1.200858421054794, "learning_rate": 9.119871962656644e-06, "loss": 0.3758, "step": 2220 }, { "epoch": 0.21615571776155718, "grad_norm": 1.5023816592412242, "learning_rate": 9.118978665790592e-06, "loss": 0.5032, "step": 2221 }, { "epoch": 0.2162530413625304, "grad_norm": 1.2258656459952086, "learning_rate": 9.118084959619099e-06, "loss": 0.4489, "step": 2222 }, { "epoch": 0.21635036496350366, "grad_norm": 1.717063075964899, "learning_rate": 9.117190844230971e-06, "loss": 0.7762, "step": 2223 }, { "epoch": 0.21644768856447688, "grad_norm": 1.210140433555958, "learning_rate": 9.11629631971506e-06, "loss": 0.4431, "step": 2224 }, { "epoch": 0.21654501216545013, "grad_norm": 1.4188251693910732, "learning_rate": 9.115401386160252e-06, "loss": 0.3495, "step": 2225 }, { "epoch": 0.21664233576642336, "grad_norm": 2.073136961715272, "learning_rate": 9.11450604365548e-06, "loss": 0.4268, "step": 2226 }, { "epoch": 0.2167396593673966, "grad_norm": 1.5265588328884594, "learning_rate": 9.113610292289714e-06, "loss": 0.4303, "step": 2227 }, { "epoch": 0.21683698296836984, "grad_norm": 1.3220401272995868, "learning_rate": 9.112714132151963e-06, "loss": 0.4221, "step": 2228 }, { "epoch": 0.21693430656934307, "grad_norm": 1.4088441022230214, "learning_rate": 9.111817563331282e-06, "loss": 0.1886, "step": 2229 }, { "epoch": 0.2170316301703163, "grad_norm": 1.3947572498286958, "learning_rate": 9.110920585916763e-06, "loss": 0.353, "step": 2230 }, { "epoch": 0.21712895377128955, "grad_norm": 1.2369368803593181, "learning_rate": 9.110023199997537e-06, "loss": 0.2576, "step": 2231 }, { "epoch": 0.21722627737226277, "grad_norm": 1.1860471672244592, "learning_rate": 9.10912540566278e-06, "loss": 0.3994, "step": 2232 }, { "epoch": 0.217323600973236, "grad_norm": 1.309576411449957, "learning_rate": 9.108227203001708e-06, "loss": 0.4453, "step": 2233 }, { "epoch": 0.21742092457420925, "grad_norm": 1.6554896930775824, "learning_rate": 9.10732859210357e-06, "loss": 0.589, "step": 2234 }, { "epoch": 0.21751824817518248, "grad_norm": 1.761859219992272, "learning_rate": 9.106429573057666e-06, "loss": 0.726, "step": 2235 }, { "epoch": 0.21761557177615573, "grad_norm": 1.35833156484165, "learning_rate": 9.105530145953335e-06, "loss": 0.4012, "step": 2236 }, { "epoch": 0.21771289537712896, "grad_norm": 3.4502529438559884, "learning_rate": 9.104630310879944e-06, "loss": 0.4621, "step": 2237 }, { "epoch": 0.21781021897810218, "grad_norm": 1.3357957463599541, "learning_rate": 9.103730067926922e-06, "loss": 0.317, "step": 2238 }, { "epoch": 0.21790754257907544, "grad_norm": 1.3566642568052916, "learning_rate": 9.102829417183716e-06, "loss": 0.4245, "step": 2239 }, { "epoch": 0.21800486618004866, "grad_norm": 1.673808040965782, "learning_rate": 9.10192835873983e-06, "loss": 0.6908, "step": 2240 }, { "epoch": 0.2181021897810219, "grad_norm": 1.8194308130790637, "learning_rate": 9.101026892684804e-06, "loss": 0.5157, "step": 2241 }, { "epoch": 0.21819951338199514, "grad_norm": 1.4443029228393756, "learning_rate": 9.100125019108214e-06, "loss": 0.5417, "step": 2242 }, { "epoch": 0.21829683698296837, "grad_norm": 1.4594341846039764, "learning_rate": 9.099222738099682e-06, "loss": 0.4297, "step": 2243 }, { "epoch": 0.2183941605839416, "grad_norm": 1.3121064822320374, "learning_rate": 9.098320049748864e-06, "loss": 0.4646, "step": 2244 }, { "epoch": 0.21849148418491485, "grad_norm": 1.5596348242175504, "learning_rate": 9.097416954145467e-06, "loss": 0.4877, "step": 2245 }, { "epoch": 0.21858880778588807, "grad_norm": 1.1835003302943965, "learning_rate": 9.096513451379225e-06, "loss": 0.3548, "step": 2246 }, { "epoch": 0.21868613138686133, "grad_norm": 1.4956699498169375, "learning_rate": 9.095609541539925e-06, "loss": 0.3958, "step": 2247 }, { "epoch": 0.21878345498783455, "grad_norm": 1.3761247023142853, "learning_rate": 9.094705224717388e-06, "loss": 0.4076, "step": 2248 }, { "epoch": 0.21888077858880778, "grad_norm": 1.2940624946938768, "learning_rate": 9.093800501001476e-06, "loss": 0.4989, "step": 2249 }, { "epoch": 0.21897810218978103, "grad_norm": 1.1389229499303237, "learning_rate": 9.092895370482091e-06, "loss": 0.332, "step": 2250 }, { "epoch": 0.21907542579075426, "grad_norm": 1.5338979130860617, "learning_rate": 9.091989833249179e-06, "loss": 0.5609, "step": 2251 }, { "epoch": 0.21917274939172748, "grad_norm": 1.3736786128370664, "learning_rate": 9.091083889392721e-06, "loss": 0.3767, "step": 2252 }, { "epoch": 0.21927007299270074, "grad_norm": 1.6001218689759074, "learning_rate": 9.090177539002743e-06, "loss": 0.5709, "step": 2253 }, { "epoch": 0.21936739659367396, "grad_norm": 1.2578364778685514, "learning_rate": 9.089270782169308e-06, "loss": 0.3796, "step": 2254 }, { "epoch": 0.2194647201946472, "grad_norm": 1.5508865589735865, "learning_rate": 9.088363618982523e-06, "loss": 0.5947, "step": 2255 }, { "epoch": 0.21956204379562044, "grad_norm": 1.2646857650137902, "learning_rate": 9.08745604953253e-06, "loss": 0.3024, "step": 2256 }, { "epoch": 0.21965936739659367, "grad_norm": 1.1168071392771144, "learning_rate": 9.08654807390952e-06, "loss": 0.3113, "step": 2257 }, { "epoch": 0.21975669099756692, "grad_norm": 1.238369237619726, "learning_rate": 9.085639692203713e-06, "loss": 0.2179, "step": 2258 }, { "epoch": 0.21985401459854015, "grad_norm": 1.2485790759653945, "learning_rate": 9.084730904505381e-06, "loss": 0.3763, "step": 2259 }, { "epoch": 0.21995133819951337, "grad_norm": 1.6082877032407055, "learning_rate": 9.083821710904827e-06, "loss": 0.3831, "step": 2260 }, { "epoch": 0.22004866180048663, "grad_norm": 1.3213256018887491, "learning_rate": 9.082912111492401e-06, "loss": 0.4091, "step": 2261 }, { "epoch": 0.22014598540145985, "grad_norm": 1.5899440724355371, "learning_rate": 9.08200210635849e-06, "loss": 0.4491, "step": 2262 }, { "epoch": 0.22024330900243308, "grad_norm": 1.30089465497526, "learning_rate": 9.081091695593518e-06, "loss": 0.3762, "step": 2263 }, { "epoch": 0.22034063260340633, "grad_norm": 1.5403984971127525, "learning_rate": 9.080180879287957e-06, "loss": 0.438, "step": 2264 }, { "epoch": 0.22043795620437956, "grad_norm": 1.5500984898931875, "learning_rate": 9.079269657532312e-06, "loss": 0.398, "step": 2265 }, { "epoch": 0.22053527980535279, "grad_norm": 1.4834461719298844, "learning_rate": 9.078358030417136e-06, "loss": 0.6175, "step": 2266 }, { "epoch": 0.22063260340632604, "grad_norm": 1.3553003212010182, "learning_rate": 9.077445998033015e-06, "loss": 0.2719, "step": 2267 }, { "epoch": 0.22072992700729926, "grad_norm": 1.573783871238475, "learning_rate": 9.07653356047058e-06, "loss": 0.2328, "step": 2268 }, { "epoch": 0.22082725060827252, "grad_norm": 1.54928316645126, "learning_rate": 9.075620717820498e-06, "loss": 0.3514, "step": 2269 }, { "epoch": 0.22092457420924574, "grad_norm": 1.3616253433976528, "learning_rate": 9.07470747017348e-06, "loss": 0.4636, "step": 2270 }, { "epoch": 0.22102189781021897, "grad_norm": 1.6741713680481711, "learning_rate": 9.073793817620277e-06, "loss": 0.6321, "step": 2271 }, { "epoch": 0.22111922141119222, "grad_norm": 1.3794305685281492, "learning_rate": 9.07287976025168e-06, "loss": 0.3172, "step": 2272 }, { "epoch": 0.22121654501216545, "grad_norm": 1.362894347632133, "learning_rate": 9.071965298158516e-06, "loss": 0.3989, "step": 2273 }, { "epoch": 0.22131386861313868, "grad_norm": 1.4233131262232992, "learning_rate": 9.071050431431658e-06, "loss": 0.4922, "step": 2274 }, { "epoch": 0.22141119221411193, "grad_norm": 1.4905332812995968, "learning_rate": 9.070135160162016e-06, "loss": 0.3952, "step": 2275 }, { "epoch": 0.22150851581508516, "grad_norm": 1.4389307945528345, "learning_rate": 9.069219484440541e-06, "loss": 0.4364, "step": 2276 }, { "epoch": 0.22160583941605838, "grad_norm": 1.4796907096594347, "learning_rate": 9.068303404358226e-06, "loss": 0.4842, "step": 2277 }, { "epoch": 0.22170316301703163, "grad_norm": 1.6561415294899449, "learning_rate": 9.0673869200061e-06, "loss": 0.5595, "step": 2278 }, { "epoch": 0.22180048661800486, "grad_norm": 1.4198474890784685, "learning_rate": 9.066470031475236e-06, "loss": 0.4762, "step": 2279 }, { "epoch": 0.22189781021897811, "grad_norm": 1.437724469115563, "learning_rate": 9.065552738856745e-06, "loss": 0.3687, "step": 2280 }, { "epoch": 0.22199513381995134, "grad_norm": 1.2431258010669888, "learning_rate": 9.06463504224178e-06, "loss": 0.3854, "step": 2281 }, { "epoch": 0.22209245742092457, "grad_norm": 1.362042407967867, "learning_rate": 9.063716941721534e-06, "loss": 0.3981, "step": 2282 }, { "epoch": 0.22218978102189782, "grad_norm": 1.3260780267557537, "learning_rate": 9.062798437387236e-06, "loss": 0.4304, "step": 2283 }, { "epoch": 0.22228710462287105, "grad_norm": 1.2009742636293355, "learning_rate": 9.06187952933016e-06, "loss": 0.3441, "step": 2284 }, { "epoch": 0.22238442822384427, "grad_norm": 1.7089934430562992, "learning_rate": 9.060960217641618e-06, "loss": 0.3488, "step": 2285 }, { "epoch": 0.22248175182481753, "grad_norm": 1.3539106224768682, "learning_rate": 9.060040502412965e-06, "loss": 0.3617, "step": 2286 }, { "epoch": 0.22257907542579075, "grad_norm": 1.3952537396094973, "learning_rate": 9.05912038373559e-06, "loss": 0.4507, "step": 2287 }, { "epoch": 0.22267639902676398, "grad_norm": 1.201207552744405, "learning_rate": 9.058199861700928e-06, "loss": 0.3074, "step": 2288 }, { "epoch": 0.22277372262773723, "grad_norm": 1.1918182161083974, "learning_rate": 9.057278936400453e-06, "loss": 0.3713, "step": 2289 }, { "epoch": 0.22287104622871046, "grad_norm": 1.5864015097741249, "learning_rate": 9.056357607925674e-06, "loss": 0.4651, "step": 2290 }, { "epoch": 0.2229683698296837, "grad_norm": 1.0855034708664277, "learning_rate": 9.055435876368148e-06, "loss": 0.2361, "step": 2291 }, { "epoch": 0.22306569343065694, "grad_norm": 1.1945153364440069, "learning_rate": 9.054513741819466e-06, "loss": 0.2803, "step": 2292 }, { "epoch": 0.22316301703163016, "grad_norm": 1.3734264039165323, "learning_rate": 9.053591204371262e-06, "loss": 0.3709, "step": 2293 }, { "epoch": 0.22326034063260342, "grad_norm": 1.662571628719731, "learning_rate": 9.052668264115206e-06, "loss": 0.6615, "step": 2294 }, { "epoch": 0.22335766423357664, "grad_norm": 1.4371203045482563, "learning_rate": 9.051744921143015e-06, "loss": 0.4082, "step": 2295 }, { "epoch": 0.22345498783454987, "grad_norm": 1.5571182647752952, "learning_rate": 9.050821175546442e-06, "loss": 0.5338, "step": 2296 }, { "epoch": 0.22355231143552312, "grad_norm": 1.4022335338581293, "learning_rate": 9.049897027417277e-06, "loss": 0.3933, "step": 2297 }, { "epoch": 0.22364963503649635, "grad_norm": 1.2815006290096387, "learning_rate": 9.048972476847356e-06, "loss": 0.4662, "step": 2298 }, { "epoch": 0.22374695863746957, "grad_norm": 1.4344706750679865, "learning_rate": 9.04804752392855e-06, "loss": 0.4422, "step": 2299 }, { "epoch": 0.22384428223844283, "grad_norm": 1.2984999163116793, "learning_rate": 9.047122168752775e-06, "loss": 0.3659, "step": 2300 }, { "epoch": 0.22394160583941605, "grad_norm": 1.1587669196843096, "learning_rate": 9.046196411411982e-06, "loss": 0.2974, "step": 2301 }, { "epoch": 0.2240389294403893, "grad_norm": 2.322228254141064, "learning_rate": 9.045270251998166e-06, "loss": 0.5667, "step": 2302 }, { "epoch": 0.22413625304136253, "grad_norm": 1.5137300738559605, "learning_rate": 9.044343690603358e-06, "loss": 0.3889, "step": 2303 }, { "epoch": 0.22423357664233576, "grad_norm": 1.472679239189759, "learning_rate": 9.04341672731963e-06, "loss": 0.4875, "step": 2304 }, { "epoch": 0.224330900243309, "grad_norm": 1.391957619608358, "learning_rate": 9.042489362239097e-06, "loss": 0.4513, "step": 2305 }, { "epoch": 0.22442822384428224, "grad_norm": 1.5752423841676473, "learning_rate": 9.041561595453914e-06, "loss": 0.6021, "step": 2306 }, { "epoch": 0.22452554744525546, "grad_norm": 1.340696458312585, "learning_rate": 9.040633427056268e-06, "loss": 0.36, "step": 2307 }, { "epoch": 0.22462287104622872, "grad_norm": 1.319309191993897, "learning_rate": 9.039704857138396e-06, "loss": 0.2632, "step": 2308 }, { "epoch": 0.22472019464720194, "grad_norm": 1.3567748798839634, "learning_rate": 9.03877588579257e-06, "loss": 0.4085, "step": 2309 }, { "epoch": 0.22481751824817517, "grad_norm": 1.7234931003044007, "learning_rate": 9.0378465131111e-06, "loss": 0.5366, "step": 2310 }, { "epoch": 0.22491484184914842, "grad_norm": 1.3431964443797024, "learning_rate": 9.036916739186341e-06, "loss": 0.3406, "step": 2311 }, { "epoch": 0.22501216545012165, "grad_norm": 1.6143507102825565, "learning_rate": 9.035986564110685e-06, "loss": 0.6322, "step": 2312 }, { "epoch": 0.2251094890510949, "grad_norm": 1.421713348254314, "learning_rate": 9.035055987976563e-06, "loss": 0.3963, "step": 2313 }, { "epoch": 0.22520681265206813, "grad_norm": 1.5860325075452377, "learning_rate": 9.034125010876447e-06, "loss": 0.4722, "step": 2314 }, { "epoch": 0.22530413625304135, "grad_norm": 1.633700480684755, "learning_rate": 9.03319363290285e-06, "loss": 0.2649, "step": 2315 }, { "epoch": 0.2254014598540146, "grad_norm": 1.5598775600409591, "learning_rate": 9.03226185414832e-06, "loss": 0.4778, "step": 2316 }, { "epoch": 0.22549878345498783, "grad_norm": 1.4413798673536165, "learning_rate": 9.031329674705455e-06, "loss": 0.3182, "step": 2317 }, { "epoch": 0.22559610705596106, "grad_norm": 1.437989358950148, "learning_rate": 9.03039709466688e-06, "loss": 0.4297, "step": 2318 }, { "epoch": 0.2256934306569343, "grad_norm": 1.3355568683760275, "learning_rate": 9.029464114125267e-06, "loss": 0.3393, "step": 2319 }, { "epoch": 0.22579075425790754, "grad_norm": 1.353161962413978, "learning_rate": 9.028530733173332e-06, "loss": 0.3362, "step": 2320 }, { "epoch": 0.22588807785888076, "grad_norm": 1.1699742479017108, "learning_rate": 9.027596951903819e-06, "loss": 0.3674, "step": 2321 }, { "epoch": 0.22598540145985402, "grad_norm": 1.1235278882417843, "learning_rate": 9.026662770409524e-06, "loss": 0.3209, "step": 2322 }, { "epoch": 0.22608272506082724, "grad_norm": 1.4951135995374567, "learning_rate": 9.025728188783273e-06, "loss": 0.4297, "step": 2323 }, { "epoch": 0.2261800486618005, "grad_norm": 1.3046514997255336, "learning_rate": 9.024793207117937e-06, "loss": 0.3765, "step": 2324 }, { "epoch": 0.22627737226277372, "grad_norm": 1.3346554142143854, "learning_rate": 9.023857825506426e-06, "loss": 0.5228, "step": 2325 }, { "epoch": 0.22637469586374695, "grad_norm": 1.4309619163867682, "learning_rate": 9.022922044041691e-06, "loss": 0.4605, "step": 2326 }, { "epoch": 0.2264720194647202, "grad_norm": 1.5152634651556307, "learning_rate": 9.021985862816718e-06, "loss": 0.5553, "step": 2327 }, { "epoch": 0.22656934306569343, "grad_norm": 1.3885182055556289, "learning_rate": 9.02104928192454e-06, "loss": 0.4831, "step": 2328 }, { "epoch": 0.22666666666666666, "grad_norm": 1.2729317064328092, "learning_rate": 9.020112301458221e-06, "loss": 0.4314, "step": 2329 }, { "epoch": 0.2267639902676399, "grad_norm": 0.9679503678492228, "learning_rate": 9.019174921510874e-06, "loss": 0.1925, "step": 2330 }, { "epoch": 0.22686131386861313, "grad_norm": 1.4513146393120597, "learning_rate": 9.018237142175643e-06, "loss": 0.5487, "step": 2331 }, { "epoch": 0.2269586374695864, "grad_norm": 1.5377065039176208, "learning_rate": 9.017298963545718e-06, "loss": 0.4063, "step": 2332 }, { "epoch": 0.22705596107055961, "grad_norm": 1.0180180453516632, "learning_rate": 9.016360385714324e-06, "loss": 0.2101, "step": 2333 }, { "epoch": 0.22715328467153284, "grad_norm": 1.3145676629552665, "learning_rate": 9.015421408774732e-06, "loss": 0.4575, "step": 2334 }, { "epoch": 0.2272506082725061, "grad_norm": 1.3213351651174845, "learning_rate": 9.014482032820247e-06, "loss": 0.3924, "step": 2335 }, { "epoch": 0.22734793187347932, "grad_norm": 1.9370834148842127, "learning_rate": 9.013542257944212e-06, "loss": 0.4332, "step": 2336 }, { "epoch": 0.22744525547445255, "grad_norm": 1.4754695985325648, "learning_rate": 9.012602084240018e-06, "loss": 0.4014, "step": 2337 }, { "epoch": 0.2275425790754258, "grad_norm": 1.1124893316550342, "learning_rate": 9.011661511801088e-06, "loss": 0.2957, "step": 2338 }, { "epoch": 0.22763990267639903, "grad_norm": 1.2537185195667433, "learning_rate": 9.010720540720888e-06, "loss": 0.3004, "step": 2339 }, { "epoch": 0.22773722627737225, "grad_norm": 1.4597689601256807, "learning_rate": 9.009779171092923e-06, "loss": 0.2555, "step": 2340 }, { "epoch": 0.2278345498783455, "grad_norm": 1.4737791439989423, "learning_rate": 9.008837403010736e-06, "loss": 0.5355, "step": 2341 }, { "epoch": 0.22793187347931873, "grad_norm": 1.3795639069131398, "learning_rate": 9.007895236567913e-06, "loss": 0.3961, "step": 2342 }, { "epoch": 0.22802919708029198, "grad_norm": 1.6364796903185053, "learning_rate": 9.006952671858078e-06, "loss": 0.444, "step": 2343 }, { "epoch": 0.2281265206812652, "grad_norm": 1.1964346909925698, "learning_rate": 9.006009708974892e-06, "loss": 0.3297, "step": 2344 }, { "epoch": 0.22822384428223844, "grad_norm": 1.343808771666808, "learning_rate": 9.00506634801206e-06, "loss": 0.4537, "step": 2345 }, { "epoch": 0.2283211678832117, "grad_norm": 1.4003110727355261, "learning_rate": 9.004122589063323e-06, "loss": 0.3883, "step": 2346 }, { "epoch": 0.22841849148418492, "grad_norm": 1.2435101838594087, "learning_rate": 9.003178432222462e-06, "loss": 0.4238, "step": 2347 }, { "epoch": 0.22851581508515814, "grad_norm": 1.324643227390155, "learning_rate": 9.0022338775833e-06, "loss": 0.4139, "step": 2348 }, { "epoch": 0.2286131386861314, "grad_norm": 1.7692069120616638, "learning_rate": 9.001288925239698e-06, "loss": 0.4719, "step": 2349 }, { "epoch": 0.22871046228710462, "grad_norm": 1.223562422765287, "learning_rate": 9.000343575285555e-06, "loss": 0.3256, "step": 2350 }, { "epoch": 0.22880778588807785, "grad_norm": 1.3407025045830592, "learning_rate": 8.999397827814812e-06, "loss": 0.3788, "step": 2351 }, { "epoch": 0.2289051094890511, "grad_norm": 1.5281139100341292, "learning_rate": 8.99845168292145e-06, "loss": 0.5565, "step": 2352 }, { "epoch": 0.22900243309002433, "grad_norm": 1.560155712083658, "learning_rate": 8.997505140699488e-06, "loss": 0.4957, "step": 2353 }, { "epoch": 0.22909975669099758, "grad_norm": 1.290422773797366, "learning_rate": 8.996558201242981e-06, "loss": 0.4011, "step": 2354 }, { "epoch": 0.2291970802919708, "grad_norm": 1.2847680894150124, "learning_rate": 8.99561086464603e-06, "loss": 0.4419, "step": 2355 }, { "epoch": 0.22929440389294403, "grad_norm": 1.4625413220428547, "learning_rate": 8.99466313100277e-06, "loss": 0.2511, "step": 2356 }, { "epoch": 0.22939172749391729, "grad_norm": 1.2882840667194135, "learning_rate": 8.99371500040738e-06, "loss": 0.3992, "step": 2357 }, { "epoch": 0.2294890510948905, "grad_norm": 1.1997126453782205, "learning_rate": 8.992766472954077e-06, "loss": 0.2639, "step": 2358 }, { "epoch": 0.22958637469586374, "grad_norm": 1.6688893120724655, "learning_rate": 8.991817548737114e-06, "loss": 0.3103, "step": 2359 }, { "epoch": 0.229683698296837, "grad_norm": 1.4031771252981649, "learning_rate": 8.990868227850788e-06, "loss": 0.4245, "step": 2360 }, { "epoch": 0.22978102189781022, "grad_norm": 1.4825462721346627, "learning_rate": 8.989918510389432e-06, "loss": 0.3973, "step": 2361 }, { "epoch": 0.22987834549878344, "grad_norm": 1.7756990641125774, "learning_rate": 8.988968396447424e-06, "loss": 0.6091, "step": 2362 }, { "epoch": 0.2299756690997567, "grad_norm": 1.5519381803018173, "learning_rate": 8.988017886119172e-06, "loss": 0.5849, "step": 2363 }, { "epoch": 0.23007299270072992, "grad_norm": 1.5288537407748173, "learning_rate": 8.987066979499133e-06, "loss": 0.594, "step": 2364 }, { "epoch": 0.23017031630170318, "grad_norm": 1.2519254160654887, "learning_rate": 8.986115676681797e-06, "loss": 0.3781, "step": 2365 }, { "epoch": 0.2302676399026764, "grad_norm": 1.2118409754918265, "learning_rate": 8.985163977761697e-06, "loss": 0.3761, "step": 2366 }, { "epoch": 0.23036496350364963, "grad_norm": 1.3123505825187787, "learning_rate": 8.984211882833402e-06, "loss": 0.405, "step": 2367 }, { "epoch": 0.23046228710462288, "grad_norm": 1.6027642184293107, "learning_rate": 8.983259391991524e-06, "loss": 0.597, "step": 2368 }, { "epoch": 0.2305596107055961, "grad_norm": 1.3646497443348367, "learning_rate": 8.982306505330712e-06, "loss": 0.4036, "step": 2369 }, { "epoch": 0.23065693430656933, "grad_norm": 1.2894115553392402, "learning_rate": 8.981353222945653e-06, "loss": 0.2778, "step": 2370 }, { "epoch": 0.2307542579075426, "grad_norm": 1.27883786418869, "learning_rate": 8.98039954493108e-06, "loss": 0.3803, "step": 2371 }, { "epoch": 0.2308515815085158, "grad_norm": 1.5863647637061415, "learning_rate": 8.979445471381755e-06, "loss": 0.4716, "step": 2372 }, { "epoch": 0.23094890510948904, "grad_norm": 1.1874137646332688, "learning_rate": 8.97849100239249e-06, "loss": 0.2846, "step": 2373 }, { "epoch": 0.2310462287104623, "grad_norm": 1.6936318641369774, "learning_rate": 8.977536138058126e-06, "loss": 0.3418, "step": 2374 }, { "epoch": 0.23114355231143552, "grad_norm": 1.0526167143851337, "learning_rate": 8.976580878473553e-06, "loss": 0.258, "step": 2375 }, { "epoch": 0.23124087591240877, "grad_norm": 1.753799809070063, "learning_rate": 8.975625223733693e-06, "loss": 0.4764, "step": 2376 }, { "epoch": 0.231338199513382, "grad_norm": 1.3814482775299988, "learning_rate": 8.97466917393351e-06, "loss": 0.3811, "step": 2377 }, { "epoch": 0.23143552311435522, "grad_norm": 1.575424754678499, "learning_rate": 8.97371272916801e-06, "loss": 0.5028, "step": 2378 }, { "epoch": 0.23153284671532848, "grad_norm": 1.5163540217481704, "learning_rate": 8.972755889532234e-06, "loss": 0.4055, "step": 2379 }, { "epoch": 0.2316301703163017, "grad_norm": 1.1877796947964157, "learning_rate": 8.971798655121264e-06, "loss": 0.2978, "step": 2380 }, { "epoch": 0.23172749391727493, "grad_norm": 1.6274909221671408, "learning_rate": 8.970841026030218e-06, "loss": 0.4319, "step": 2381 }, { "epoch": 0.23182481751824818, "grad_norm": 1.413480143472021, "learning_rate": 8.969883002354259e-06, "loss": 0.4015, "step": 2382 }, { "epoch": 0.2319221411192214, "grad_norm": 1.451327617189514, "learning_rate": 8.968924584188587e-06, "loss": 0.5107, "step": 2383 }, { "epoch": 0.23201946472019463, "grad_norm": 1.4288160659587352, "learning_rate": 8.96796577162844e-06, "loss": 0.369, "step": 2384 }, { "epoch": 0.2321167883211679, "grad_norm": 1.6469132304956866, "learning_rate": 8.967006564769094e-06, "loss": 0.5982, "step": 2385 }, { "epoch": 0.23221411192214111, "grad_norm": 1.4887239693800984, "learning_rate": 8.966046963705869e-06, "loss": 0.4967, "step": 2386 }, { "epoch": 0.23231143552311437, "grad_norm": 1.2469481884120308, "learning_rate": 8.965086968534116e-06, "loss": 0.4022, "step": 2387 }, { "epoch": 0.2324087591240876, "grad_norm": 2.6320603198934047, "learning_rate": 8.964126579349237e-06, "loss": 0.2489, "step": 2388 }, { "epoch": 0.23250608272506082, "grad_norm": 1.2339093742509784, "learning_rate": 8.963165796246663e-06, "loss": 0.3694, "step": 2389 }, { "epoch": 0.23260340632603407, "grad_norm": 1.4634162966788549, "learning_rate": 8.962204619321866e-06, "loss": 0.5646, "step": 2390 }, { "epoch": 0.2327007299270073, "grad_norm": 1.2919651066139786, "learning_rate": 8.961243048670363e-06, "loss": 0.3833, "step": 2391 }, { "epoch": 0.23279805352798053, "grad_norm": 1.5273773111622013, "learning_rate": 8.960281084387701e-06, "loss": 0.5724, "step": 2392 }, { "epoch": 0.23289537712895378, "grad_norm": 1.4704498843019616, "learning_rate": 8.959318726569475e-06, "loss": 0.5232, "step": 2393 }, { "epoch": 0.232992700729927, "grad_norm": 1.52947786509823, "learning_rate": 8.958355975311314e-06, "loss": 0.5014, "step": 2394 }, { "epoch": 0.23309002433090023, "grad_norm": 1.457234959002331, "learning_rate": 8.957392830708886e-06, "loss": 0.5401, "step": 2395 }, { "epoch": 0.23318734793187348, "grad_norm": 1.5878948291380384, "learning_rate": 8.9564292928579e-06, "loss": 0.4481, "step": 2396 }, { "epoch": 0.2332846715328467, "grad_norm": 1.3353181262068508, "learning_rate": 8.955465361854103e-06, "loss": 0.3668, "step": 2397 }, { "epoch": 0.23338199513381996, "grad_norm": 2.023729457927684, "learning_rate": 8.954501037793282e-06, "loss": 0.256, "step": 2398 }, { "epoch": 0.2334793187347932, "grad_norm": 1.3501136378744423, "learning_rate": 8.953536320771264e-06, "loss": 0.4288, "step": 2399 }, { "epoch": 0.23357664233576642, "grad_norm": 0.9695156209886321, "learning_rate": 8.95257121088391e-06, "loss": 0.3313, "step": 2400 }, { "epoch": 0.23367396593673967, "grad_norm": 1.6268089203048999, "learning_rate": 8.951605708227125e-06, "loss": 0.5031, "step": 2401 }, { "epoch": 0.2337712895377129, "grad_norm": 1.3327356528771297, "learning_rate": 8.950639812896852e-06, "loss": 0.352, "step": 2402 }, { "epoch": 0.23386861313868612, "grad_norm": 1.646158604731562, "learning_rate": 8.949673524989074e-06, "loss": 0.6143, "step": 2403 }, { "epoch": 0.23396593673965937, "grad_norm": 1.4459398712277267, "learning_rate": 8.948706844599809e-06, "loss": 0.301, "step": 2404 }, { "epoch": 0.2340632603406326, "grad_norm": 1.242464142709881, "learning_rate": 8.947739771825118e-06, "loss": 0.3867, "step": 2405 }, { "epoch": 0.23416058394160583, "grad_norm": 1.283369590610404, "learning_rate": 8.946772306761099e-06, "loss": 0.3396, "step": 2406 }, { "epoch": 0.23425790754257908, "grad_norm": 1.659051576981879, "learning_rate": 8.94580444950389e-06, "loss": 0.2985, "step": 2407 }, { "epoch": 0.2343552311435523, "grad_norm": 1.5811557183787177, "learning_rate": 8.944836200149669e-06, "loss": 0.5412, "step": 2408 }, { "epoch": 0.23445255474452556, "grad_norm": 1.5937284580345608, "learning_rate": 8.943867558794648e-06, "loss": 0.4562, "step": 2409 }, { "epoch": 0.23454987834549879, "grad_norm": 1.179539450140548, "learning_rate": 8.942898525535085e-06, "loss": 0.2436, "step": 2410 }, { "epoch": 0.234647201946472, "grad_norm": 1.2115140465312926, "learning_rate": 8.941929100467272e-06, "loss": 0.325, "step": 2411 }, { "epoch": 0.23474452554744527, "grad_norm": 1.3228525862104779, "learning_rate": 8.94095928368754e-06, "loss": 0.4001, "step": 2412 }, { "epoch": 0.2348418491484185, "grad_norm": 1.5093562470528878, "learning_rate": 8.939989075292263e-06, "loss": 0.3554, "step": 2413 }, { "epoch": 0.23493917274939172, "grad_norm": 1.629660086616085, "learning_rate": 8.93901847537785e-06, "loss": 0.6349, "step": 2414 }, { "epoch": 0.23503649635036497, "grad_norm": 1.0826348229158524, "learning_rate": 8.938047484040749e-06, "loss": 0.2681, "step": 2415 }, { "epoch": 0.2351338199513382, "grad_norm": 1.2841520241198179, "learning_rate": 8.93707610137745e-06, "loss": 0.4081, "step": 2416 }, { "epoch": 0.23523114355231142, "grad_norm": 1.913465881785096, "learning_rate": 8.936104327484479e-06, "loss": 0.7043, "step": 2417 }, { "epoch": 0.23532846715328468, "grad_norm": 1.386306701477425, "learning_rate": 8.935132162458401e-06, "loss": 0.341, "step": 2418 }, { "epoch": 0.2354257907542579, "grad_norm": 1.1278547518059516, "learning_rate": 8.934159606395821e-06, "loss": 0.3151, "step": 2419 }, { "epoch": 0.23552311435523116, "grad_norm": 1.5540265542588236, "learning_rate": 8.933186659393384e-06, "loss": 0.6514, "step": 2420 }, { "epoch": 0.23562043795620438, "grad_norm": 1.278787339635804, "learning_rate": 8.932213321547769e-06, "loss": 0.3423, "step": 2421 }, { "epoch": 0.2357177615571776, "grad_norm": 1.2885094583361822, "learning_rate": 8.931239592955701e-06, "loss": 0.2958, "step": 2422 }, { "epoch": 0.23581508515815086, "grad_norm": 1.5181901598500283, "learning_rate": 8.930265473713939e-06, "loss": 0.4212, "step": 2423 }, { "epoch": 0.2359124087591241, "grad_norm": 1.2136160482551297, "learning_rate": 8.92929096391928e-06, "loss": 0.3982, "step": 2424 }, { "epoch": 0.2360097323600973, "grad_norm": 1.5487072814518004, "learning_rate": 8.928316063668562e-06, "loss": 0.5676, "step": 2425 }, { "epoch": 0.23610705596107057, "grad_norm": 1.430432818475582, "learning_rate": 8.927340773058664e-06, "loss": 0.4735, "step": 2426 }, { "epoch": 0.2362043795620438, "grad_norm": 1.4586841524588252, "learning_rate": 8.926365092186498e-06, "loss": 0.5637, "step": 2427 }, { "epoch": 0.23630170316301702, "grad_norm": 1.5364014523424565, "learning_rate": 8.92538902114902e-06, "loss": 0.4783, "step": 2428 }, { "epoch": 0.23639902676399027, "grad_norm": 1.3896600182614345, "learning_rate": 8.924412560043223e-06, "loss": 0.3748, "step": 2429 }, { "epoch": 0.2364963503649635, "grad_norm": 1.304447540327908, "learning_rate": 8.923435708966135e-06, "loss": 0.3373, "step": 2430 }, { "epoch": 0.23659367396593675, "grad_norm": 1.3383082719469825, "learning_rate": 8.922458468014833e-06, "loss": 0.3089, "step": 2431 }, { "epoch": 0.23669099756690998, "grad_norm": 1.4376693294142868, "learning_rate": 8.921480837286418e-06, "loss": 0.2665, "step": 2432 }, { "epoch": 0.2367883211678832, "grad_norm": 1.3948368197200884, "learning_rate": 8.920502816878045e-06, "loss": 0.4349, "step": 2433 }, { "epoch": 0.23688564476885646, "grad_norm": 1.5583938814663865, "learning_rate": 8.919524406886897e-06, "loss": 0.4528, "step": 2434 }, { "epoch": 0.23698296836982968, "grad_norm": 1.455016515054737, "learning_rate": 8.918545607410199e-06, "loss": 0.416, "step": 2435 }, { "epoch": 0.2370802919708029, "grad_norm": 1.5707414335423742, "learning_rate": 8.917566418545215e-06, "loss": 0.4269, "step": 2436 }, { "epoch": 0.23717761557177616, "grad_norm": 1.6214497738286784, "learning_rate": 8.916586840389248e-06, "loss": 0.5531, "step": 2437 }, { "epoch": 0.2372749391727494, "grad_norm": 1.5231468510302828, "learning_rate": 8.91560687303964e-06, "loss": 0.5464, "step": 2438 }, { "epoch": 0.23737226277372261, "grad_norm": 1.5631657517225734, "learning_rate": 8.91462651659377e-06, "loss": 0.4098, "step": 2439 }, { "epoch": 0.23746958637469587, "grad_norm": 1.5003582208774642, "learning_rate": 8.913645771149058e-06, "loss": 0.342, "step": 2440 }, { "epoch": 0.2375669099756691, "grad_norm": 1.2703591332316027, "learning_rate": 8.91266463680296e-06, "loss": 0.3195, "step": 2441 }, { "epoch": 0.23766423357664235, "grad_norm": 1.3910967851640175, "learning_rate": 8.91168311365297e-06, "loss": 0.334, "step": 2442 }, { "epoch": 0.23776155717761557, "grad_norm": 1.5001053773105038, "learning_rate": 8.910701201796625e-06, "loss": 0.4665, "step": 2443 }, { "epoch": 0.2378588807785888, "grad_norm": 1.6142849143926903, "learning_rate": 8.9097189013315e-06, "loss": 0.5276, "step": 2444 }, { "epoch": 0.23795620437956205, "grad_norm": 1.2059866820401877, "learning_rate": 8.908736212355202e-06, "loss": 0.2936, "step": 2445 }, { "epoch": 0.23805352798053528, "grad_norm": 1.4496663268694052, "learning_rate": 8.907753134965387e-06, "loss": 0.475, "step": 2446 }, { "epoch": 0.2381508515815085, "grad_norm": 1.4184456855989886, "learning_rate": 8.90676966925974e-06, "loss": 0.4477, "step": 2447 }, { "epoch": 0.23824817518248176, "grad_norm": 1.7126804340284862, "learning_rate": 8.90578581533599e-06, "loss": 0.6392, "step": 2448 }, { "epoch": 0.23834549878345498, "grad_norm": 1.6085356958926766, "learning_rate": 8.904801573291901e-06, "loss": 0.4428, "step": 2449 }, { "epoch": 0.2384428223844282, "grad_norm": 1.1724096477321129, "learning_rate": 8.903816943225281e-06, "loss": 0.23, "step": 2450 }, { "epoch": 0.23854014598540146, "grad_norm": 1.2849397331978023, "learning_rate": 8.902831925233972e-06, "loss": 0.4315, "step": 2451 }, { "epoch": 0.2386374695863747, "grad_norm": 1.3479724015628292, "learning_rate": 8.901846519415856e-06, "loss": 0.4528, "step": 2452 }, { "epoch": 0.23873479318734794, "grad_norm": 1.5241447958707557, "learning_rate": 8.900860725868852e-06, "loss": 0.5638, "step": 2453 }, { "epoch": 0.23883211678832117, "grad_norm": 1.3008951589753057, "learning_rate": 8.899874544690921e-06, "loss": 0.4364, "step": 2454 }, { "epoch": 0.2389294403892944, "grad_norm": 1.3889516127516133, "learning_rate": 8.89888797598006e-06, "loss": 0.5968, "step": 2455 }, { "epoch": 0.23902676399026765, "grad_norm": 1.3382384356293548, "learning_rate": 8.8979010198343e-06, "loss": 0.3423, "step": 2456 }, { "epoch": 0.23912408759124087, "grad_norm": 1.3927455122024084, "learning_rate": 8.896913676351726e-06, "loss": 0.5291, "step": 2457 }, { "epoch": 0.2392214111922141, "grad_norm": 1.3654704619725508, "learning_rate": 8.895925945630441e-06, "loss": 0.3224, "step": 2458 }, { "epoch": 0.23931873479318735, "grad_norm": 2.420859240745107, "learning_rate": 8.8949378277686e-06, "loss": 0.4526, "step": 2459 }, { "epoch": 0.23941605839416058, "grad_norm": 1.279171164356654, "learning_rate": 8.893949322864394e-06, "loss": 0.3452, "step": 2460 }, { "epoch": 0.2395133819951338, "grad_norm": 1.4336845514712926, "learning_rate": 8.89296043101605e-06, "loss": 0.3891, "step": 2461 }, { "epoch": 0.23961070559610706, "grad_norm": 1.2391493008048138, "learning_rate": 8.891971152321836e-06, "loss": 0.5135, "step": 2462 }, { "epoch": 0.23970802919708029, "grad_norm": 1.2398633987802397, "learning_rate": 8.890981486880057e-06, "loss": 0.2688, "step": 2463 }, { "epoch": 0.23980535279805354, "grad_norm": 1.1975725536626207, "learning_rate": 8.889991434789054e-06, "loss": 0.4181, "step": 2464 }, { "epoch": 0.23990267639902677, "grad_norm": 1.5121790458693565, "learning_rate": 8.889000996147213e-06, "loss": 0.667, "step": 2465 }, { "epoch": 0.24, "grad_norm": 1.2980809407294283, "learning_rate": 8.888010171052951e-06, "loss": 0.4025, "step": 2466 }, { "epoch": 0.24009732360097324, "grad_norm": 1.3683247659037883, "learning_rate": 8.887018959604731e-06, "loss": 0.4195, "step": 2467 }, { "epoch": 0.24019464720194647, "grad_norm": 1.6392091000056277, "learning_rate": 8.886027361901045e-06, "loss": 0.4464, "step": 2468 }, { "epoch": 0.2402919708029197, "grad_norm": 1.4286158146093557, "learning_rate": 8.885035378040435e-06, "loss": 0.503, "step": 2469 }, { "epoch": 0.24038929440389295, "grad_norm": 1.6249203295617591, "learning_rate": 8.884043008121468e-06, "loss": 0.5875, "step": 2470 }, { "epoch": 0.24048661800486618, "grad_norm": 1.316531393288964, "learning_rate": 8.883050252242762e-06, "loss": 0.3225, "step": 2471 }, { "epoch": 0.24058394160583943, "grad_norm": 1.3738066957140371, "learning_rate": 8.882057110502964e-06, "loss": 0.3863, "step": 2472 }, { "epoch": 0.24068126520681266, "grad_norm": 1.6149562610100578, "learning_rate": 8.881063583000766e-06, "loss": 0.6899, "step": 2473 }, { "epoch": 0.24077858880778588, "grad_norm": 1.1978996054498634, "learning_rate": 8.880069669834895e-06, "loss": 0.4647, "step": 2474 }, { "epoch": 0.24087591240875914, "grad_norm": 1.3737195294986575, "learning_rate": 8.879075371104114e-06, "loss": 0.3404, "step": 2475 }, { "epoch": 0.24097323600973236, "grad_norm": 1.3242090275500389, "learning_rate": 8.878080686907231e-06, "loss": 0.4923, "step": 2476 }, { "epoch": 0.2410705596107056, "grad_norm": 1.295191211796917, "learning_rate": 8.877085617343085e-06, "loss": 0.4449, "step": 2477 }, { "epoch": 0.24116788321167884, "grad_norm": 1.5068542914468723, "learning_rate": 8.87609016251056e-06, "loss": 0.5506, "step": 2478 }, { "epoch": 0.24126520681265207, "grad_norm": 1.650040845654398, "learning_rate": 8.87509432250857e-06, "loss": 0.5715, "step": 2479 }, { "epoch": 0.2413625304136253, "grad_norm": 1.5289429392674028, "learning_rate": 8.874098097436078e-06, "loss": 0.5626, "step": 2480 }, { "epoch": 0.24145985401459855, "grad_norm": 1.3609358059405043, "learning_rate": 8.873101487392078e-06, "loss": 0.4096, "step": 2481 }, { "epoch": 0.24155717761557177, "grad_norm": 1.5725676631470524, "learning_rate": 8.8721044924756e-06, "loss": 0.6597, "step": 2482 }, { "epoch": 0.24165450121654503, "grad_norm": 1.094002939677081, "learning_rate": 8.87110711278572e-06, "loss": 0.3206, "step": 2483 }, { "epoch": 0.24175182481751825, "grad_norm": 1.4551979783640236, "learning_rate": 8.870109348421544e-06, "loss": 0.445, "step": 2484 }, { "epoch": 0.24184914841849148, "grad_norm": 1.534219781362636, "learning_rate": 8.869111199482227e-06, "loss": 0.6666, "step": 2485 }, { "epoch": 0.24194647201946473, "grad_norm": 0.9530847884904149, "learning_rate": 8.86811266606695e-06, "loss": 0.2756, "step": 2486 }, { "epoch": 0.24204379562043796, "grad_norm": 1.4859819247146357, "learning_rate": 8.86711374827494e-06, "loss": 0.4626, "step": 2487 }, { "epoch": 0.24214111922141118, "grad_norm": 1.5336983239407425, "learning_rate": 8.86611444620546e-06, "loss": 0.5383, "step": 2488 }, { "epoch": 0.24223844282238444, "grad_norm": 1.4073640437212571, "learning_rate": 8.865114759957812e-06, "loss": 0.4675, "step": 2489 }, { "epoch": 0.24233576642335766, "grad_norm": 1.562895534043348, "learning_rate": 8.864114689631334e-06, "loss": 0.5641, "step": 2490 }, { "epoch": 0.2424330900243309, "grad_norm": 1.643145414496213, "learning_rate": 8.863114235325405e-06, "loss": 0.5749, "step": 2491 }, { "epoch": 0.24253041362530414, "grad_norm": 1.226721686463078, "learning_rate": 8.862113397139437e-06, "loss": 0.3432, "step": 2492 }, { "epoch": 0.24262773722627737, "grad_norm": 1.2699959241996903, "learning_rate": 8.86111217517289e-06, "loss": 0.4203, "step": 2493 }, { "epoch": 0.24272506082725062, "grad_norm": 1.4233705808484327, "learning_rate": 8.860110569525253e-06, "loss": 0.2601, "step": 2494 }, { "epoch": 0.24282238442822385, "grad_norm": 1.3784035260656315, "learning_rate": 8.859108580296055e-06, "loss": 0.4973, "step": 2495 }, { "epoch": 0.24291970802919707, "grad_norm": 1.2790024746236357, "learning_rate": 8.858106207584864e-06, "loss": 0.4067, "step": 2496 }, { "epoch": 0.24301703163017033, "grad_norm": 1.4041054798155945, "learning_rate": 8.857103451491292e-06, "loss": 0.5228, "step": 2497 }, { "epoch": 0.24311435523114355, "grad_norm": 1.6788565066048042, "learning_rate": 8.856100312114975e-06, "loss": 0.7133, "step": 2498 }, { "epoch": 0.24321167883211678, "grad_norm": 1.2024623978380433, "learning_rate": 8.855096789555602e-06, "loss": 0.2507, "step": 2499 }, { "epoch": 0.24330900243309003, "grad_norm": 1.3828470689148782, "learning_rate": 8.85409288391289e-06, "loss": 0.3993, "step": 2500 }, { "epoch": 0.24340632603406326, "grad_norm": 1.427484284296059, "learning_rate": 8.8530885952866e-06, "loss": 0.3926, "step": 2501 }, { "epoch": 0.24350364963503648, "grad_norm": 1.3193446567792235, "learning_rate": 8.852083923776529e-06, "loss": 0.2152, "step": 2502 }, { "epoch": 0.24360097323600974, "grad_norm": 1.3297823718570532, "learning_rate": 8.851078869482509e-06, "loss": 0.4772, "step": 2503 }, { "epoch": 0.24369829683698296, "grad_norm": 1.3044660003313646, "learning_rate": 8.850073432504416e-06, "loss": 0.3589, "step": 2504 }, { "epoch": 0.24379562043795622, "grad_norm": 1.4488096250914715, "learning_rate": 8.84906761294216e-06, "loss": 0.3261, "step": 2505 }, { "epoch": 0.24389294403892944, "grad_norm": 1.2778329641523152, "learning_rate": 8.848061410895687e-06, "loss": 0.3047, "step": 2506 }, { "epoch": 0.24399026763990267, "grad_norm": 1.135638375757245, "learning_rate": 8.847054826464988e-06, "loss": 0.3173, "step": 2507 }, { "epoch": 0.24408759124087592, "grad_norm": 1.5033745953013864, "learning_rate": 8.846047859750086e-06, "loss": 0.4813, "step": 2508 }, { "epoch": 0.24418491484184915, "grad_norm": 1.1189501535394493, "learning_rate": 8.845040510851044e-06, "loss": 0.3359, "step": 2509 }, { "epoch": 0.24428223844282237, "grad_norm": 1.4743455663494507, "learning_rate": 8.844032779867966e-06, "loss": 0.5354, "step": 2510 }, { "epoch": 0.24437956204379563, "grad_norm": 1.2644405709657818, "learning_rate": 8.843024666900983e-06, "loss": 0.4019, "step": 2511 }, { "epoch": 0.24447688564476885, "grad_norm": 1.5585250648144962, "learning_rate": 8.84201617205028e-06, "loss": 0.4977, "step": 2512 }, { "epoch": 0.24457420924574208, "grad_norm": 1.5187811483320863, "learning_rate": 8.841007295416069e-06, "loss": 0.6282, "step": 2513 }, { "epoch": 0.24467153284671533, "grad_norm": 1.461783750506842, "learning_rate": 8.839998037098601e-06, "loss": 0.6085, "step": 2514 }, { "epoch": 0.24476885644768856, "grad_norm": 1.4235036556142022, "learning_rate": 8.838988397198167e-06, "loss": 0.5696, "step": 2515 }, { "epoch": 0.2448661800486618, "grad_norm": 1.6731038078758624, "learning_rate": 8.837978375815097e-06, "loss": 0.5026, "step": 2516 }, { "epoch": 0.24496350364963504, "grad_norm": 1.2803102163564937, "learning_rate": 8.836967973049757e-06, "loss": 0.2605, "step": 2517 }, { "epoch": 0.24506082725060827, "grad_norm": 1.2869808613318177, "learning_rate": 8.835957189002551e-06, "loss": 0.3073, "step": 2518 }, { "epoch": 0.24515815085158152, "grad_norm": 1.4129342483481067, "learning_rate": 8.834946023773921e-06, "loss": 0.334, "step": 2519 }, { "epoch": 0.24525547445255474, "grad_norm": 1.6342111830003216, "learning_rate": 8.833934477464348e-06, "loss": 0.6127, "step": 2520 }, { "epoch": 0.24535279805352797, "grad_norm": 1.6465764681762454, "learning_rate": 8.83292255017435e-06, "loss": 0.6432, "step": 2521 }, { "epoch": 0.24545012165450122, "grad_norm": 1.4262158711234114, "learning_rate": 8.83191024200448e-06, "loss": 0.5224, "step": 2522 }, { "epoch": 0.24554744525547445, "grad_norm": 1.593193256147642, "learning_rate": 8.830897553055337e-06, "loss": 0.5211, "step": 2523 }, { "epoch": 0.24564476885644768, "grad_norm": 1.624031218270973, "learning_rate": 8.829884483427547e-06, "loss": 0.5128, "step": 2524 }, { "epoch": 0.24574209245742093, "grad_norm": 1.991662408778961, "learning_rate": 8.828871033221783e-06, "loss": 0.3025, "step": 2525 }, { "epoch": 0.24583941605839416, "grad_norm": 1.4390691402915812, "learning_rate": 8.82785720253875e-06, "loss": 0.5088, "step": 2526 }, { "epoch": 0.2459367396593674, "grad_norm": 1.4179406701872763, "learning_rate": 8.826842991479197e-06, "loss": 0.3887, "step": 2527 }, { "epoch": 0.24603406326034064, "grad_norm": 1.460230365502962, "learning_rate": 8.825828400143902e-06, "loss": 0.3316, "step": 2528 }, { "epoch": 0.24613138686131386, "grad_norm": 1.4924241123043909, "learning_rate": 8.824813428633685e-06, "loss": 0.4989, "step": 2529 }, { "epoch": 0.24622871046228711, "grad_norm": 1.593556186634644, "learning_rate": 8.82379807704941e-06, "loss": 0.525, "step": 2530 }, { "epoch": 0.24632603406326034, "grad_norm": 1.6809566227650843, "learning_rate": 8.822782345491968e-06, "loss": 0.3421, "step": 2531 }, { "epoch": 0.24642335766423357, "grad_norm": 1.4773288736144092, "learning_rate": 8.821766234062294e-06, "loss": 0.534, "step": 2532 }, { "epoch": 0.24652068126520682, "grad_norm": 1.4001059355846526, "learning_rate": 8.820749742861363e-06, "loss": 0.3887, "step": 2533 }, { "epoch": 0.24661800486618005, "grad_norm": 1.349012582441713, "learning_rate": 8.81973287199018e-06, "loss": 0.2852, "step": 2534 }, { "epoch": 0.24671532846715327, "grad_norm": 1.651550318908522, "learning_rate": 8.818715621549794e-06, "loss": 0.4967, "step": 2535 }, { "epoch": 0.24681265206812653, "grad_norm": 1.5932669562049986, "learning_rate": 8.817697991641289e-06, "loss": 0.4173, "step": 2536 }, { "epoch": 0.24690997566909975, "grad_norm": 1.3550488264007063, "learning_rate": 8.816679982365787e-06, "loss": 0.3404, "step": 2537 }, { "epoch": 0.247007299270073, "grad_norm": 1.571341106532058, "learning_rate": 8.815661593824451e-06, "loss": 0.5666, "step": 2538 }, { "epoch": 0.24710462287104623, "grad_norm": 1.5685299297246114, "learning_rate": 8.814642826118477e-06, "loss": 0.4521, "step": 2539 }, { "epoch": 0.24720194647201946, "grad_norm": 1.5355691524375334, "learning_rate": 8.8136236793491e-06, "loss": 0.3452, "step": 2540 }, { "epoch": 0.2472992700729927, "grad_norm": 1.4490992247448509, "learning_rate": 8.812604153617594e-06, "loss": 0.3046, "step": 2541 }, { "epoch": 0.24739659367396594, "grad_norm": 1.5790493967738255, "learning_rate": 8.81158424902527e-06, "loss": 0.5957, "step": 2542 }, { "epoch": 0.24749391727493916, "grad_norm": 1.8299083651337236, "learning_rate": 8.810563965673478e-06, "loss": 0.529, "step": 2543 }, { "epoch": 0.24759124087591242, "grad_norm": 1.336357630649535, "learning_rate": 8.8095433036636e-06, "loss": 0.2498, "step": 2544 }, { "epoch": 0.24768856447688564, "grad_norm": 3.272954864246679, "learning_rate": 8.808522263097065e-06, "loss": 0.3439, "step": 2545 }, { "epoch": 0.24778588807785887, "grad_norm": 1.5948700054852, "learning_rate": 8.80750084407533e-06, "loss": 0.5754, "step": 2546 }, { "epoch": 0.24788321167883212, "grad_norm": 1.2457293034288246, "learning_rate": 8.806479046699896e-06, "loss": 0.3355, "step": 2547 }, { "epoch": 0.24798053527980535, "grad_norm": 1.4118835775208534, "learning_rate": 8.8054568710723e-06, "loss": 0.4843, "step": 2548 }, { "epoch": 0.2480778588807786, "grad_norm": 2.0167817337794745, "learning_rate": 8.804434317294115e-06, "loss": 0.4781, "step": 2549 }, { "epoch": 0.24817518248175183, "grad_norm": 1.630746510877536, "learning_rate": 8.803411385466954e-06, "loss": 0.5226, "step": 2550 }, { "epoch": 0.24827250608272505, "grad_norm": 1.0942598516950242, "learning_rate": 8.802388075692465e-06, "loss": 0.1843, "step": 2551 }, { "epoch": 0.2483698296836983, "grad_norm": 1.8060042956650721, "learning_rate": 8.801364388072336e-06, "loss": 0.705, "step": 2552 }, { "epoch": 0.24846715328467153, "grad_norm": 1.632331667833736, "learning_rate": 8.800340322708291e-06, "loss": 0.4964, "step": 2553 }, { "epoch": 0.24856447688564476, "grad_norm": 1.539098206701319, "learning_rate": 8.799315879702095e-06, "loss": 0.3962, "step": 2554 }, { "epoch": 0.248661800486618, "grad_norm": 1.2219114137184675, "learning_rate": 8.798291059155543e-06, "loss": 0.2497, "step": 2555 }, { "epoch": 0.24875912408759124, "grad_norm": 1.4540964796439875, "learning_rate": 8.797265861170471e-06, "loss": 0.5159, "step": 2556 }, { "epoch": 0.24885644768856446, "grad_norm": 1.554150512584087, "learning_rate": 8.796240285848761e-06, "loss": 0.4412, "step": 2557 }, { "epoch": 0.24895377128953772, "grad_norm": 1.7004545782091594, "learning_rate": 8.795214333292318e-06, "loss": 0.5179, "step": 2558 }, { "epoch": 0.24905109489051094, "grad_norm": 1.726524110945535, "learning_rate": 8.794188003603095e-06, "loss": 0.4071, "step": 2559 }, { "epoch": 0.2491484184914842, "grad_norm": 1.27126477948415, "learning_rate": 8.793161296883077e-06, "loss": 0.2268, "step": 2560 }, { "epoch": 0.24924574209245742, "grad_norm": 1.9752049062158858, "learning_rate": 8.79213421323429e-06, "loss": 0.3632, "step": 2561 }, { "epoch": 0.24934306569343065, "grad_norm": 1.0556825817929254, "learning_rate": 8.791106752758796e-06, "loss": 0.3627, "step": 2562 }, { "epoch": 0.2494403892944039, "grad_norm": 1.6452772754401714, "learning_rate": 8.790078915558693e-06, "loss": 0.6043, "step": 2563 }, { "epoch": 0.24953771289537713, "grad_norm": 1.278547180886592, "learning_rate": 8.789050701736117e-06, "loss": 0.3768, "step": 2564 }, { "epoch": 0.24963503649635035, "grad_norm": 1.3443028399521961, "learning_rate": 8.788022111393247e-06, "loss": 0.3856, "step": 2565 }, { "epoch": 0.2497323600973236, "grad_norm": 1.2774166354695482, "learning_rate": 8.78699314463229e-06, "loss": 0.4391, "step": 2566 }, { "epoch": 0.24982968369829683, "grad_norm": 1.2231715277397497, "learning_rate": 8.785963801555497e-06, "loss": 0.4128, "step": 2567 }, { "epoch": 0.24992700729927006, "grad_norm": 1.4012153782510572, "learning_rate": 8.784934082265154e-06, "loss": 0.4683, "step": 2568 }, { "epoch": 0.2500243309002433, "grad_norm": 1.1954060436870173, "learning_rate": 8.783903986863583e-06, "loss": 0.2786, "step": 2569 }, { "epoch": 0.25012165450121654, "grad_norm": 1.7116998515615807, "learning_rate": 8.782873515453148e-06, "loss": 0.6004, "step": 2570 }, { "epoch": 0.2502189781021898, "grad_norm": 1.5712719922889962, "learning_rate": 8.781842668136247e-06, "loss": 0.6172, "step": 2571 }, { "epoch": 0.250316301703163, "grad_norm": 1.246915874910697, "learning_rate": 8.780811445015316e-06, "loss": 0.4335, "step": 2572 }, { "epoch": 0.25041362530413624, "grad_norm": 1.341456518636559, "learning_rate": 8.779779846192827e-06, "loss": 0.4187, "step": 2573 }, { "epoch": 0.2505109489051095, "grad_norm": 1.1323562755710477, "learning_rate": 8.778747871771293e-06, "loss": 0.2832, "step": 2574 }, { "epoch": 0.25060827250608275, "grad_norm": 1.4401083791532063, "learning_rate": 8.777715521853258e-06, "loss": 0.3779, "step": 2575 }, { "epoch": 0.25070559610705595, "grad_norm": 1.4784987737181619, "learning_rate": 8.77668279654131e-06, "loss": 0.3129, "step": 2576 }, { "epoch": 0.2508029197080292, "grad_norm": 1.1394717513462493, "learning_rate": 8.775649695938074e-06, "loss": 0.3162, "step": 2577 }, { "epoch": 0.25090024330900246, "grad_norm": 1.4625556674372375, "learning_rate": 8.774616220146204e-06, "loss": 0.4605, "step": 2578 }, { "epoch": 0.25099756690997566, "grad_norm": 3.1521808341091875, "learning_rate": 8.773582369268402e-06, "loss": 0.3485, "step": 2579 }, { "epoch": 0.2510948905109489, "grad_norm": 1.3578124438111323, "learning_rate": 8.7725481434074e-06, "loss": 0.4693, "step": 2580 }, { "epoch": 0.25119221411192216, "grad_norm": 1.63411664215404, "learning_rate": 8.771513542665969e-06, "loss": 0.4956, "step": 2581 }, { "epoch": 0.25128953771289536, "grad_norm": 1.5098765580454843, "learning_rate": 8.77047856714692e-06, "loss": 0.4657, "step": 2582 }, { "epoch": 0.2513868613138686, "grad_norm": 1.2801786921613054, "learning_rate": 8.7694432169531e-06, "loss": 0.3369, "step": 2583 }, { "epoch": 0.25148418491484187, "grad_norm": 1.4360422953324754, "learning_rate": 8.768407492187388e-06, "loss": 0.4907, "step": 2584 }, { "epoch": 0.25158150851581507, "grad_norm": 1.4560406874169747, "learning_rate": 8.767371392952708e-06, "loss": 0.3157, "step": 2585 }, { "epoch": 0.2516788321167883, "grad_norm": 1.934211832538441, "learning_rate": 8.766334919352018e-06, "loss": 0.7151, "step": 2586 }, { "epoch": 0.2517761557177616, "grad_norm": 1.6767044903158872, "learning_rate": 8.76529807148831e-06, "loss": 0.331, "step": 2587 }, { "epoch": 0.25187347931873477, "grad_norm": 1.4698852047894042, "learning_rate": 8.76426084946462e-06, "loss": 0.3951, "step": 2588 }, { "epoch": 0.251970802919708, "grad_norm": 1.3539539414605721, "learning_rate": 8.763223253384015e-06, "loss": 0.4011, "step": 2589 }, { "epoch": 0.2520681265206813, "grad_norm": 1.506242240790805, "learning_rate": 8.762185283349603e-06, "loss": 0.5274, "step": 2590 }, { "epoch": 0.2521654501216545, "grad_norm": 1.3140936667503142, "learning_rate": 8.761146939464527e-06, "loss": 0.3198, "step": 2591 }, { "epoch": 0.25226277372262773, "grad_norm": 1.1404767919952752, "learning_rate": 8.760108221831967e-06, "loss": 0.4013, "step": 2592 }, { "epoch": 0.252360097323601, "grad_norm": 1.4693477307137552, "learning_rate": 8.759069130555142e-06, "loss": 0.4783, "step": 2593 }, { "epoch": 0.2524574209245742, "grad_norm": 1.3352582665983712, "learning_rate": 8.75802966573731e-06, "loss": 0.4617, "step": 2594 }, { "epoch": 0.25255474452554744, "grad_norm": 1.2824428866870197, "learning_rate": 8.756989827481756e-06, "loss": 0.3352, "step": 2595 }, { "epoch": 0.2526520681265207, "grad_norm": 1.4774059328965283, "learning_rate": 8.755949615891814e-06, "loss": 0.4635, "step": 2596 }, { "epoch": 0.25274939172749394, "grad_norm": 1.6875827910282526, "learning_rate": 8.754909031070852e-06, "loss": 0.6222, "step": 2597 }, { "epoch": 0.25284671532846714, "grad_norm": 1.2063205441417741, "learning_rate": 8.75386807312227e-06, "loss": 0.2455, "step": 2598 }, { "epoch": 0.2529440389294404, "grad_norm": 1.3021547323360578, "learning_rate": 8.752826742149512e-06, "loss": 0.4329, "step": 2599 }, { "epoch": 0.25304136253041365, "grad_norm": 1.1835878076183852, "learning_rate": 8.751785038256054e-06, "loss": 0.3662, "step": 2600 }, { "epoch": 0.25313868613138685, "grad_norm": 1.544717999496196, "learning_rate": 8.750742961545409e-06, "loss": 0.3971, "step": 2601 }, { "epoch": 0.2532360097323601, "grad_norm": 1.3629505007649398, "learning_rate": 8.749700512121131e-06, "loss": 0.5107, "step": 2602 }, { "epoch": 0.25333333333333335, "grad_norm": 1.5737001686599814, "learning_rate": 8.74865769008681e-06, "loss": 0.5279, "step": 2603 }, { "epoch": 0.25343065693430655, "grad_norm": 1.4784815997261378, "learning_rate": 8.747614495546069e-06, "loss": 0.4792, "step": 2604 }, { "epoch": 0.2535279805352798, "grad_norm": 1.3236076722973804, "learning_rate": 8.74657092860257e-06, "loss": 0.3975, "step": 2605 }, { "epoch": 0.25362530413625306, "grad_norm": 1.0968595172191475, "learning_rate": 8.745526989360018e-06, "loss": 0.269, "step": 2606 }, { "epoch": 0.25372262773722626, "grad_norm": 1.562505340567045, "learning_rate": 8.744482677922147e-06, "loss": 0.5157, "step": 2607 }, { "epoch": 0.2538199513381995, "grad_norm": 1.656826278908397, "learning_rate": 8.743437994392729e-06, "loss": 0.4867, "step": 2608 }, { "epoch": 0.25391727493917277, "grad_norm": 1.3948672448161548, "learning_rate": 8.742392938875577e-06, "loss": 0.5279, "step": 2609 }, { "epoch": 0.25401459854014596, "grad_norm": 1.5892338813179163, "learning_rate": 8.741347511474539e-06, "loss": 0.5611, "step": 2610 }, { "epoch": 0.2541119221411192, "grad_norm": 1.6074249897923194, "learning_rate": 8.740301712293498e-06, "loss": 0.351, "step": 2611 }, { "epoch": 0.25420924574209247, "grad_norm": 1.6505540033315536, "learning_rate": 8.739255541436379e-06, "loss": 0.5747, "step": 2612 }, { "epoch": 0.25430656934306567, "grad_norm": 1.3314247428577628, "learning_rate": 8.738208999007137e-06, "loss": 0.3779, "step": 2613 }, { "epoch": 0.2544038929440389, "grad_norm": 1.2796270745139389, "learning_rate": 8.737162085109768e-06, "loss": 0.3557, "step": 2614 }, { "epoch": 0.2545012165450122, "grad_norm": 1.602637102567401, "learning_rate": 8.736114799848307e-06, "loss": 0.2882, "step": 2615 }, { "epoch": 0.2545985401459854, "grad_norm": 1.4207119219562419, "learning_rate": 8.735067143326821e-06, "loss": 0.3881, "step": 2616 }, { "epoch": 0.25469586374695863, "grad_norm": 1.4305110706379638, "learning_rate": 8.73401911564942e-06, "loss": 0.3486, "step": 2617 }, { "epoch": 0.2547931873479319, "grad_norm": 1.434428707272536, "learning_rate": 8.732970716920242e-06, "loss": 0.3169, "step": 2618 }, { "epoch": 0.25489051094890514, "grad_norm": 1.3228470441064362, "learning_rate": 8.73192194724347e-06, "loss": 0.4485, "step": 2619 }, { "epoch": 0.25498783454987833, "grad_norm": 1.3897030806906485, "learning_rate": 8.730872806723318e-06, "loss": 0.4172, "step": 2620 }, { "epoch": 0.2550851581508516, "grad_norm": 1.3840681937318722, "learning_rate": 8.729823295464045e-06, "loss": 0.251, "step": 2621 }, { "epoch": 0.25518248175182484, "grad_norm": 1.775278354364079, "learning_rate": 8.728773413569938e-06, "loss": 0.4811, "step": 2622 }, { "epoch": 0.25527980535279804, "grad_norm": 1.2701408917829737, "learning_rate": 8.727723161145325e-06, "loss": 0.2827, "step": 2623 }, { "epoch": 0.2553771289537713, "grad_norm": 1.5528362504659363, "learning_rate": 8.72667253829457e-06, "loss": 0.5084, "step": 2624 }, { "epoch": 0.25547445255474455, "grad_norm": 1.3793988523162408, "learning_rate": 8.725621545122074e-06, "loss": 0.3979, "step": 2625 }, { "epoch": 0.25557177615571774, "grad_norm": 1.70282889775673, "learning_rate": 8.724570181732275e-06, "loss": 0.5983, "step": 2626 }, { "epoch": 0.255669099756691, "grad_norm": 1.28105292316495, "learning_rate": 8.723518448229649e-06, "loss": 0.4756, "step": 2627 }, { "epoch": 0.25576642335766425, "grad_norm": 1.3826686116158597, "learning_rate": 8.722466344718705e-06, "loss": 0.2978, "step": 2628 }, { "epoch": 0.25586374695863745, "grad_norm": 1.460242284502631, "learning_rate": 8.721413871303992e-06, "loss": 0.4036, "step": 2629 }, { "epoch": 0.2559610705596107, "grad_norm": 1.4181157816170762, "learning_rate": 8.720361028090095e-06, "loss": 0.4224, "step": 2630 }, { "epoch": 0.25605839416058396, "grad_norm": 1.7898330028782403, "learning_rate": 8.719307815181638e-06, "loss": 0.7314, "step": 2631 }, { "epoch": 0.25615571776155716, "grad_norm": 1.6886124652733636, "learning_rate": 8.718254232683276e-06, "loss": 0.3513, "step": 2632 }, { "epoch": 0.2562530413625304, "grad_norm": 1.2562027575971086, "learning_rate": 8.717200280699705e-06, "loss": 0.284, "step": 2633 }, { "epoch": 0.25635036496350366, "grad_norm": 1.4899596514775177, "learning_rate": 8.716145959335658e-06, "loss": 0.2778, "step": 2634 }, { "epoch": 0.25644768856447686, "grad_norm": 1.1699021581347986, "learning_rate": 8.715091268695903e-06, "loss": 0.3163, "step": 2635 }, { "epoch": 0.2565450121654501, "grad_norm": 1.020653527182934, "learning_rate": 8.714036208885243e-06, "loss": 0.2191, "step": 2636 }, { "epoch": 0.25664233576642337, "grad_norm": 1.5373942827305265, "learning_rate": 8.712980780008526e-06, "loss": 0.4183, "step": 2637 }, { "epoch": 0.25673965936739657, "grad_norm": 1.1268355971062876, "learning_rate": 8.711924982170623e-06, "loss": 0.2851, "step": 2638 }, { "epoch": 0.2568369829683698, "grad_norm": 1.25228244300652, "learning_rate": 8.710868815476456e-06, "loss": 0.1963, "step": 2639 }, { "epoch": 0.2569343065693431, "grad_norm": 1.3905442460862172, "learning_rate": 8.709812280030971e-06, "loss": 0.3648, "step": 2640 }, { "epoch": 0.2570316301703163, "grad_norm": 1.5078176389616522, "learning_rate": 8.708755375939162e-06, "loss": 0.4131, "step": 2641 }, { "epoch": 0.2571289537712895, "grad_norm": 1.4441200079463874, "learning_rate": 8.70769810330605e-06, "loss": 0.4047, "step": 2642 }, { "epoch": 0.2572262773722628, "grad_norm": 1.3883503516178042, "learning_rate": 8.7066404622367e-06, "loss": 0.3308, "step": 2643 }, { "epoch": 0.25732360097323603, "grad_norm": 1.7851696055640995, "learning_rate": 8.705582452836208e-06, "loss": 0.336, "step": 2644 }, { "epoch": 0.25742092457420923, "grad_norm": 1.309628752016819, "learning_rate": 8.70452407520971e-06, "loss": 0.3462, "step": 2645 }, { "epoch": 0.2575182481751825, "grad_norm": 1.3618437175125289, "learning_rate": 8.703465329462379e-06, "loss": 0.3047, "step": 2646 }, { "epoch": 0.25761557177615574, "grad_norm": 1.5821297320572192, "learning_rate": 8.702406215699421e-06, "loss": 0.2318, "step": 2647 }, { "epoch": 0.25771289537712894, "grad_norm": 1.4729014225467234, "learning_rate": 8.701346734026082e-06, "loss": 0.3147, "step": 2648 }, { "epoch": 0.2578102189781022, "grad_norm": 1.6287249640343295, "learning_rate": 8.700286884547642e-06, "loss": 0.5808, "step": 2649 }, { "epoch": 0.25790754257907544, "grad_norm": 1.2824109098190504, "learning_rate": 8.69922666736942e-06, "loss": 0.3836, "step": 2650 }, { "epoch": 0.25800486618004864, "grad_norm": 1.5096397594183033, "learning_rate": 8.69816608259677e-06, "loss": 0.3804, "step": 2651 }, { "epoch": 0.2581021897810219, "grad_norm": 1.7247008216261863, "learning_rate": 8.697105130335084e-06, "loss": 0.3378, "step": 2652 }, { "epoch": 0.25819951338199515, "grad_norm": 1.5872130127065738, "learning_rate": 8.69604381068979e-06, "loss": 0.4369, "step": 2653 }, { "epoch": 0.25829683698296835, "grad_norm": 1.5909295650502344, "learning_rate": 8.694982123766348e-06, "loss": 0.3554, "step": 2654 }, { "epoch": 0.2583941605839416, "grad_norm": 1.7135035115393307, "learning_rate": 8.693920069670265e-06, "loss": 0.4869, "step": 2655 }, { "epoch": 0.25849148418491485, "grad_norm": 1.3366492087792976, "learning_rate": 8.692857648507071e-06, "loss": 0.3102, "step": 2656 }, { "epoch": 0.25858880778588805, "grad_norm": 1.2478048122674565, "learning_rate": 8.691794860382345e-06, "loss": 0.3722, "step": 2657 }, { "epoch": 0.2586861313868613, "grad_norm": 1.5080776475601503, "learning_rate": 8.690731705401694e-06, "loss": 0.316, "step": 2658 }, { "epoch": 0.25878345498783456, "grad_norm": 1.443811575497146, "learning_rate": 8.689668183670763e-06, "loss": 0.2875, "step": 2659 }, { "epoch": 0.25888077858880776, "grad_norm": 1.7036441396737687, "learning_rate": 8.688604295295238e-06, "loss": 0.4025, "step": 2660 }, { "epoch": 0.258978102189781, "grad_norm": 1.4234806259439374, "learning_rate": 8.687540040380838e-06, "loss": 0.4452, "step": 2661 }, { "epoch": 0.25907542579075427, "grad_norm": 1.2741393980838642, "learning_rate": 8.686475419033315e-06, "loss": 0.2237, "step": 2662 }, { "epoch": 0.2591727493917275, "grad_norm": 1.1826384563722763, "learning_rate": 8.685410431358464e-06, "loss": 0.3398, "step": 2663 }, { "epoch": 0.2592700729927007, "grad_norm": 1.5757741509023746, "learning_rate": 8.684345077462117e-06, "loss": 0.3846, "step": 2664 }, { "epoch": 0.25936739659367397, "grad_norm": 1.475707275733763, "learning_rate": 8.683279357450131e-06, "loss": 0.2804, "step": 2665 }, { "epoch": 0.2594647201946472, "grad_norm": 1.4241797244636094, "learning_rate": 8.682213271428415e-06, "loss": 0.2553, "step": 2666 }, { "epoch": 0.2595620437956204, "grad_norm": 1.1548194283365685, "learning_rate": 8.6811468195029e-06, "loss": 0.3118, "step": 2667 }, { "epoch": 0.2596593673965937, "grad_norm": 1.5918458521510486, "learning_rate": 8.680080001779564e-06, "loss": 0.4525, "step": 2668 }, { "epoch": 0.25975669099756693, "grad_norm": 1.5508802560099362, "learning_rate": 8.679012818364416e-06, "loss": 0.4163, "step": 2669 }, { "epoch": 0.25985401459854013, "grad_norm": 2.4434630008376232, "learning_rate": 8.677945269363504e-06, "loss": 0.4372, "step": 2670 }, { "epoch": 0.2599513381995134, "grad_norm": 1.5324792404386718, "learning_rate": 8.676877354882907e-06, "loss": 0.3514, "step": 2671 }, { "epoch": 0.26004866180048664, "grad_norm": 2.0012246197360493, "learning_rate": 8.67580907502875e-06, "loss": 0.5067, "step": 2672 }, { "epoch": 0.26014598540145983, "grad_norm": 1.5232176793280576, "learning_rate": 8.674740429907186e-06, "loss": 0.4174, "step": 2673 }, { "epoch": 0.2602433090024331, "grad_norm": 1.3322865976928646, "learning_rate": 8.673671419624405e-06, "loss": 0.4095, "step": 2674 }, { "epoch": 0.26034063260340634, "grad_norm": 1.514406481268828, "learning_rate": 8.672602044286638e-06, "loss": 0.5915, "step": 2675 }, { "epoch": 0.26043795620437954, "grad_norm": 1.528467413797325, "learning_rate": 8.67153230400015e-06, "loss": 0.4018, "step": 2676 }, { "epoch": 0.2605352798053528, "grad_norm": 1.4367698805538582, "learning_rate": 8.670462198871237e-06, "loss": 0.4115, "step": 2677 }, { "epoch": 0.26063260340632605, "grad_norm": 1.6984444092554742, "learning_rate": 8.66939172900624e-06, "loss": 0.59, "step": 2678 }, { "epoch": 0.26072992700729924, "grad_norm": 1.4698751482200727, "learning_rate": 8.668320894511534e-06, "loss": 0.4144, "step": 2679 }, { "epoch": 0.2608272506082725, "grad_norm": 1.5003641004534345, "learning_rate": 8.667249695493525e-06, "loss": 0.4294, "step": 2680 }, { "epoch": 0.26092457420924575, "grad_norm": 1.3123452231563197, "learning_rate": 8.666178132058659e-06, "loss": 0.3408, "step": 2681 }, { "epoch": 0.261021897810219, "grad_norm": 1.5184535738040659, "learning_rate": 8.665106204313418e-06, "loss": 0.3662, "step": 2682 }, { "epoch": 0.2611192214111922, "grad_norm": 1.0623024588559944, "learning_rate": 8.664033912364321e-06, "loss": 0.2953, "step": 2683 }, { "epoch": 0.26121654501216546, "grad_norm": 1.4112725317400583, "learning_rate": 8.662961256317923e-06, "loss": 0.3825, "step": 2684 }, { "epoch": 0.2613138686131387, "grad_norm": 2.2729536767377065, "learning_rate": 8.661888236280813e-06, "loss": 0.5791, "step": 2685 }, { "epoch": 0.2614111922141119, "grad_norm": 2.2747614305768504, "learning_rate": 8.660814852359617e-06, "loss": 0.4859, "step": 2686 }, { "epoch": 0.26150851581508516, "grad_norm": 1.6069562939941755, "learning_rate": 8.659741104661002e-06, "loss": 0.5254, "step": 2687 }, { "epoch": 0.2616058394160584, "grad_norm": 1.3624858995460438, "learning_rate": 8.658666993291662e-06, "loss": 0.3904, "step": 2688 }, { "epoch": 0.2617031630170316, "grad_norm": 1.2954398797770197, "learning_rate": 8.657592518358332e-06, "loss": 0.3789, "step": 2689 }, { "epoch": 0.26180048661800487, "grad_norm": 1.4158991903907718, "learning_rate": 8.656517679967788e-06, "loss": 0.3732, "step": 2690 }, { "epoch": 0.2618978102189781, "grad_norm": 1.3754641009755615, "learning_rate": 8.655442478226835e-06, "loss": 0.3035, "step": 2691 }, { "epoch": 0.2619951338199513, "grad_norm": 1.3522608722257456, "learning_rate": 8.654366913242316e-06, "loss": 0.347, "step": 2692 }, { "epoch": 0.2620924574209246, "grad_norm": 1.2764013704656585, "learning_rate": 8.65329098512111e-06, "loss": 0.4207, "step": 2693 }, { "epoch": 0.2621897810218978, "grad_norm": 1.4009476621873176, "learning_rate": 8.652214693970133e-06, "loss": 0.4628, "step": 2694 }, { "epoch": 0.262287104622871, "grad_norm": 1.3860597575903169, "learning_rate": 8.65113803989634e-06, "loss": 0.3844, "step": 2695 }, { "epoch": 0.2623844282238443, "grad_norm": 1.5636622874346966, "learning_rate": 8.650061023006711e-06, "loss": 0.6239, "step": 2696 }, { "epoch": 0.26248175182481753, "grad_norm": 1.3677003606993399, "learning_rate": 8.648983643408276e-06, "loss": 0.4319, "step": 2697 }, { "epoch": 0.26257907542579073, "grad_norm": 1.4720449620822884, "learning_rate": 8.647905901208096e-06, "loss": 0.4824, "step": 2698 }, { "epoch": 0.262676399026764, "grad_norm": 1.4180687903221385, "learning_rate": 8.646827796513262e-06, "loss": 0.539, "step": 2699 }, { "epoch": 0.26277372262773724, "grad_norm": 1.3679667840460958, "learning_rate": 8.64574932943091e-06, "loss": 0.4588, "step": 2700 }, { "epoch": 0.26287104622871044, "grad_norm": 1.125542933529368, "learning_rate": 8.644670500068205e-06, "loss": 0.3441, "step": 2701 }, { "epoch": 0.2629683698296837, "grad_norm": 1.5641789380613262, "learning_rate": 8.643591308532353e-06, "loss": 0.4998, "step": 2702 }, { "epoch": 0.26306569343065694, "grad_norm": 1.3425870342919086, "learning_rate": 8.642511754930592e-06, "loss": 0.4678, "step": 2703 }, { "epoch": 0.2631630170316302, "grad_norm": 1.3010588112101855, "learning_rate": 8.641431839370199e-06, "loss": 0.4005, "step": 2704 }, { "epoch": 0.2632603406326034, "grad_norm": 1.0067860306988832, "learning_rate": 8.640351561958487e-06, "loss": 0.2243, "step": 2705 }, { "epoch": 0.26335766423357665, "grad_norm": 1.4713856201410829, "learning_rate": 8.639270922802802e-06, "loss": 0.4325, "step": 2706 }, { "epoch": 0.2634549878345499, "grad_norm": 1.55962351192921, "learning_rate": 8.63818992201053e-06, "loss": 0.5307, "step": 2707 }, { "epoch": 0.2635523114355231, "grad_norm": 1.4073629002175063, "learning_rate": 8.637108559689088e-06, "loss": 0.3329, "step": 2708 }, { "epoch": 0.26364963503649635, "grad_norm": 1.2827086170801953, "learning_rate": 8.636026835945933e-06, "loss": 0.3095, "step": 2709 }, { "epoch": 0.2637469586374696, "grad_norm": 1.4100209855486194, "learning_rate": 8.634944750888556e-06, "loss": 0.3033, "step": 2710 }, { "epoch": 0.2638442822384428, "grad_norm": 1.343279822840104, "learning_rate": 8.633862304624484e-06, "loss": 0.402, "step": 2711 }, { "epoch": 0.26394160583941606, "grad_norm": 1.4374516520455163, "learning_rate": 8.632779497261284e-06, "loss": 0.4574, "step": 2712 }, { "epoch": 0.2640389294403893, "grad_norm": 1.1554648336740065, "learning_rate": 8.63169632890655e-06, "loss": 0.3091, "step": 2713 }, { "epoch": 0.2641362530413625, "grad_norm": 1.5304191047752203, "learning_rate": 8.630612799667923e-06, "loss": 0.5392, "step": 2714 }, { "epoch": 0.26423357664233577, "grad_norm": 1.9364214941018973, "learning_rate": 8.629528909653067e-06, "loss": 0.4705, "step": 2715 }, { "epoch": 0.264330900243309, "grad_norm": 1.5176007479008755, "learning_rate": 8.628444658969694e-06, "loss": 0.3969, "step": 2716 }, { "epoch": 0.2644282238442822, "grad_norm": 1.3882529784475808, "learning_rate": 8.627360047725543e-06, "loss": 0.4672, "step": 2717 }, { "epoch": 0.26452554744525547, "grad_norm": 1.0419873824719341, "learning_rate": 8.626275076028397e-06, "loss": 0.2247, "step": 2718 }, { "epoch": 0.2646228710462287, "grad_norm": 1.4147177174052021, "learning_rate": 8.625189743986068e-06, "loss": 0.3922, "step": 2719 }, { "epoch": 0.2647201946472019, "grad_norm": 1.3513629744004096, "learning_rate": 8.624104051706405e-06, "loss": 0.415, "step": 2720 }, { "epoch": 0.2648175182481752, "grad_norm": 1.3701041364422066, "learning_rate": 8.623017999297294e-06, "loss": 0.4329, "step": 2721 }, { "epoch": 0.26491484184914843, "grad_norm": 1.5102917148163044, "learning_rate": 8.621931586866658e-06, "loss": 0.4104, "step": 2722 }, { "epoch": 0.26501216545012163, "grad_norm": 1.4836677874290423, "learning_rate": 8.620844814522455e-06, "loss": 0.5131, "step": 2723 }, { "epoch": 0.2651094890510949, "grad_norm": 1.2607364196409017, "learning_rate": 8.619757682372675e-06, "loss": 0.3856, "step": 2724 }, { "epoch": 0.26520681265206814, "grad_norm": 1.4082529003642341, "learning_rate": 8.61867019052535e-06, "loss": 0.4719, "step": 2725 }, { "epoch": 0.2653041362530414, "grad_norm": 1.4276001080419702, "learning_rate": 8.617582339088545e-06, "loss": 0.2825, "step": 2726 }, { "epoch": 0.2654014598540146, "grad_norm": 1.4331001450603844, "learning_rate": 8.61649412817036e-06, "loss": 0.5104, "step": 2727 }, { "epoch": 0.26549878345498784, "grad_norm": 1.358868383954866, "learning_rate": 8.615405557878929e-06, "loss": 0.4359, "step": 2728 }, { "epoch": 0.2655961070559611, "grad_norm": 1.678463370024911, "learning_rate": 8.614316628322427e-06, "loss": 0.4658, "step": 2729 }, { "epoch": 0.2656934306569343, "grad_norm": 1.2268291596580612, "learning_rate": 8.61322733960906e-06, "loss": 0.2337, "step": 2730 }, { "epoch": 0.26579075425790755, "grad_norm": 0.9437944818586388, "learning_rate": 8.61213769184707e-06, "loss": 0.2525, "step": 2731 }, { "epoch": 0.2658880778588808, "grad_norm": 1.2480121542051432, "learning_rate": 8.611047685144737e-06, "loss": 0.2656, "step": 2732 }, { "epoch": 0.265985401459854, "grad_norm": 1.5255853623894704, "learning_rate": 8.609957319610377e-06, "loss": 0.5071, "step": 2733 }, { "epoch": 0.26608272506082725, "grad_norm": 1.5847632660353408, "learning_rate": 8.60886659535234e-06, "loss": 0.4018, "step": 2734 }, { "epoch": 0.2661800486618005, "grad_norm": 1.3469310633769445, "learning_rate": 8.60777551247901e-06, "loss": 0.451, "step": 2735 }, { "epoch": 0.2662773722627737, "grad_norm": 1.3995570810499534, "learning_rate": 8.60668407109881e-06, "loss": 0.4991, "step": 2736 }, { "epoch": 0.26637469586374696, "grad_norm": 1.5198269828404072, "learning_rate": 8.605592271320199e-06, "loss": 0.4266, "step": 2737 }, { "epoch": 0.2664720194647202, "grad_norm": 1.3040716122405567, "learning_rate": 8.604500113251666e-06, "loss": 0.3465, "step": 2738 }, { "epoch": 0.2665693430656934, "grad_norm": 1.3643506509353014, "learning_rate": 8.60340759700174e-06, "loss": 0.4355, "step": 2739 }, { "epoch": 0.26666666666666666, "grad_norm": 1.026074804296968, "learning_rate": 8.602314722678989e-06, "loss": 0.2507, "step": 2740 }, { "epoch": 0.2667639902676399, "grad_norm": 1.3894972782664292, "learning_rate": 8.601221490392009e-06, "loss": 0.3981, "step": 2741 }, { "epoch": 0.2668613138686131, "grad_norm": 1.3071238902768438, "learning_rate": 8.600127900249435e-06, "loss": 0.5138, "step": 2742 }, { "epoch": 0.26695863746958637, "grad_norm": 1.61583752885221, "learning_rate": 8.59903395235994e-06, "loss": 0.5072, "step": 2743 }, { "epoch": 0.2670559610705596, "grad_norm": 1.3679578518174673, "learning_rate": 8.597939646832227e-06, "loss": 0.3754, "step": 2744 }, { "epoch": 0.2671532846715328, "grad_norm": 1.0943121419181938, "learning_rate": 8.596844983775042e-06, "loss": 0.2457, "step": 2745 }, { "epoch": 0.2672506082725061, "grad_norm": 1.176479145152164, "learning_rate": 8.59574996329716e-06, "loss": 0.3687, "step": 2746 }, { "epoch": 0.2673479318734793, "grad_norm": 1.2666642902167933, "learning_rate": 8.594654585507393e-06, "loss": 0.2664, "step": 2747 }, { "epoch": 0.2674452554744526, "grad_norm": 1.3951377938692817, "learning_rate": 8.59355885051459e-06, "loss": 0.4035, "step": 2748 }, { "epoch": 0.2675425790754258, "grad_norm": 1.2722832533001889, "learning_rate": 8.592462758427635e-06, "loss": 0.4643, "step": 2749 }, { "epoch": 0.26763990267639903, "grad_norm": 1.2157588835981379, "learning_rate": 8.59136630935545e-06, "loss": 0.3612, "step": 2750 }, { "epoch": 0.2677372262773723, "grad_norm": 1.0785566378114326, "learning_rate": 8.590269503406986e-06, "loss": 0.3403, "step": 2751 }, { "epoch": 0.2678345498783455, "grad_norm": 1.2447292785758555, "learning_rate": 8.589172340691235e-06, "loss": 0.3873, "step": 2752 }, { "epoch": 0.26793187347931874, "grad_norm": 1.166378916722292, "learning_rate": 8.588074821317222e-06, "loss": 0.3264, "step": 2753 }, { "epoch": 0.268029197080292, "grad_norm": 1.2197572995933224, "learning_rate": 8.586976945394008e-06, "loss": 0.3793, "step": 2754 }, { "epoch": 0.2681265206812652, "grad_norm": 1.6234832434134598, "learning_rate": 8.58587871303069e-06, "loss": 0.5521, "step": 2755 }, { "epoch": 0.26822384428223844, "grad_norm": 1.4760533014923396, "learning_rate": 8.584780124336403e-06, "loss": 0.5024, "step": 2756 }, { "epoch": 0.2683211678832117, "grad_norm": 1.4156240197993037, "learning_rate": 8.58368117942031e-06, "loss": 0.2848, "step": 2757 }, { "epoch": 0.2684184914841849, "grad_norm": 1.9092848960981135, "learning_rate": 8.582581878391614e-06, "loss": 0.4053, "step": 2758 }, { "epoch": 0.26851581508515815, "grad_norm": 1.2158050168465575, "learning_rate": 8.581482221359557e-06, "loss": 0.2709, "step": 2759 }, { "epoch": 0.2686131386861314, "grad_norm": 1.5515245630825936, "learning_rate": 8.580382208433408e-06, "loss": 0.4549, "step": 2760 }, { "epoch": 0.2687104622871046, "grad_norm": 1.6603384837941395, "learning_rate": 8.57928183972248e-06, "loss": 0.3316, "step": 2761 }, { "epoch": 0.26880778588807785, "grad_norm": 1.5595744401068579, "learning_rate": 8.578181115336114e-06, "loss": 0.5733, "step": 2762 }, { "epoch": 0.2689051094890511, "grad_norm": 1.3547786308004384, "learning_rate": 8.577080035383693e-06, "loss": 0.5295, "step": 2763 }, { "epoch": 0.2690024330900243, "grad_norm": 1.2889595684224195, "learning_rate": 8.57597859997463e-06, "loss": 0.3876, "step": 2764 }, { "epoch": 0.26909975669099756, "grad_norm": 1.5401948742368967, "learning_rate": 8.574876809218375e-06, "loss": 0.4847, "step": 2765 }, { "epoch": 0.2691970802919708, "grad_norm": 1.5886773556984544, "learning_rate": 8.573774663224414e-06, "loss": 0.4746, "step": 2766 }, { "epoch": 0.269294403892944, "grad_norm": 1.2747463684628804, "learning_rate": 8.572672162102269e-06, "loss": 0.2568, "step": 2767 }, { "epoch": 0.26939172749391727, "grad_norm": 1.1674673988315882, "learning_rate": 8.571569305961495e-06, "loss": 0.4329, "step": 2768 }, { "epoch": 0.2694890510948905, "grad_norm": 1.6882113617461265, "learning_rate": 8.570466094911684e-06, "loss": 0.6891, "step": 2769 }, { "epoch": 0.2695863746958638, "grad_norm": 1.6660737969996857, "learning_rate": 8.569362529062461e-06, "loss": 0.5887, "step": 2770 }, { "epoch": 0.26968369829683697, "grad_norm": 1.1653044559020052, "learning_rate": 8.568258608523491e-06, "loss": 0.2452, "step": 2771 }, { "epoch": 0.2697810218978102, "grad_norm": 1.5681206888540218, "learning_rate": 8.567154333404471e-06, "loss": 0.4952, "step": 2772 }, { "epoch": 0.2698783454987835, "grad_norm": 1.3994591247160806, "learning_rate": 8.56604970381513e-06, "loss": 0.2848, "step": 2773 }, { "epoch": 0.2699756690997567, "grad_norm": 1.300192393224716, "learning_rate": 8.564944719865238e-06, "loss": 0.3924, "step": 2774 }, { "epoch": 0.27007299270072993, "grad_norm": 1.4412015443912716, "learning_rate": 8.5638393816646e-06, "loss": 0.4531, "step": 2775 }, { "epoch": 0.2701703163017032, "grad_norm": 1.4360872043281558, "learning_rate": 8.56273368932305e-06, "loss": 0.4571, "step": 2776 }, { "epoch": 0.2702676399026764, "grad_norm": 1.5811581309774965, "learning_rate": 8.561627642950465e-06, "loss": 0.4638, "step": 2777 }, { "epoch": 0.27036496350364964, "grad_norm": 1.7924696283680308, "learning_rate": 8.560521242656751e-06, "loss": 0.2922, "step": 2778 }, { "epoch": 0.2704622871046229, "grad_norm": 1.7929283253885162, "learning_rate": 8.559414488551854e-06, "loss": 0.6197, "step": 2779 }, { "epoch": 0.2705596107055961, "grad_norm": 1.5593955671219286, "learning_rate": 8.558307380745751e-06, "loss": 0.5448, "step": 2780 }, { "epoch": 0.27065693430656934, "grad_norm": 1.3760682204767343, "learning_rate": 8.557199919348455e-06, "loss": 0.4434, "step": 2781 }, { "epoch": 0.2707542579075426, "grad_norm": 3.203989647256839, "learning_rate": 8.556092104470019e-06, "loss": 0.4323, "step": 2782 }, { "epoch": 0.2708515815085158, "grad_norm": 1.3460764595466628, "learning_rate": 8.554983936220525e-06, "loss": 0.3367, "step": 2783 }, { "epoch": 0.27094890510948905, "grad_norm": 1.6160732245190643, "learning_rate": 8.553875414710088e-06, "loss": 0.5301, "step": 2784 }, { "epoch": 0.2710462287104623, "grad_norm": 1.5749454761331767, "learning_rate": 8.552766540048872e-06, "loss": 0.3741, "step": 2785 }, { "epoch": 0.2711435523114355, "grad_norm": 1.150423059184381, "learning_rate": 8.551657312347057e-06, "loss": 0.2796, "step": 2786 }, { "epoch": 0.27124087591240875, "grad_norm": 1.4217054664233575, "learning_rate": 8.550547731714874e-06, "loss": 0.4543, "step": 2787 }, { "epoch": 0.271338199513382, "grad_norm": 1.470206005686861, "learning_rate": 8.54943779826258e-06, "loss": 0.438, "step": 2788 }, { "epoch": 0.2714355231143552, "grad_norm": 1.5766219733733982, "learning_rate": 8.54832751210047e-06, "loss": 0.4966, "step": 2789 }, { "epoch": 0.27153284671532846, "grad_norm": 1.2135102045567707, "learning_rate": 8.547216873338876e-06, "loss": 0.358, "step": 2790 }, { "epoch": 0.2716301703163017, "grad_norm": 1.4595225616938101, "learning_rate": 8.546105882088158e-06, "loss": 0.2225, "step": 2791 }, { "epoch": 0.27172749391727496, "grad_norm": 1.3363330099445299, "learning_rate": 8.54499453845872e-06, "loss": 0.3914, "step": 2792 }, { "epoch": 0.27182481751824816, "grad_norm": 1.3646141902938869, "learning_rate": 8.543882842560997e-06, "loss": 0.4558, "step": 2793 }, { "epoch": 0.2719221411192214, "grad_norm": 1.3464180828493995, "learning_rate": 8.542770794505456e-06, "loss": 0.4786, "step": 2794 }, { "epoch": 0.27201946472019467, "grad_norm": 1.044551377255888, "learning_rate": 8.541658394402606e-06, "loss": 0.303, "step": 2795 }, { "epoch": 0.27211678832116787, "grad_norm": 1.6706499263846184, "learning_rate": 8.540545642362982e-06, "loss": 0.4033, "step": 2796 }, { "epoch": 0.2722141119221411, "grad_norm": 1.3164784669169094, "learning_rate": 8.539432538497162e-06, "loss": 0.4343, "step": 2797 }, { "epoch": 0.2723114355231144, "grad_norm": 1.6044535524867656, "learning_rate": 8.538319082915757e-06, "loss": 0.3641, "step": 2798 }, { "epoch": 0.2724087591240876, "grad_norm": 1.9897822202433566, "learning_rate": 8.537205275729406e-06, "loss": 0.48, "step": 2799 }, { "epoch": 0.2725060827250608, "grad_norm": 1.4110579632506512, "learning_rate": 8.536091117048794e-06, "loss": 0.4798, "step": 2800 }, { "epoch": 0.2726034063260341, "grad_norm": 1.4415607317920478, "learning_rate": 8.534976606984636e-06, "loss": 0.343, "step": 2801 }, { "epoch": 0.2727007299270073, "grad_norm": 1.6363482727427716, "learning_rate": 8.53386174564768e-06, "loss": 0.6087, "step": 2802 }, { "epoch": 0.27279805352798053, "grad_norm": 1.1272383780084416, "learning_rate": 8.532746533148708e-06, "loss": 0.2444, "step": 2803 }, { "epoch": 0.2728953771289538, "grad_norm": 1.712140222332907, "learning_rate": 8.531630969598544e-06, "loss": 0.6702, "step": 2804 }, { "epoch": 0.272992700729927, "grad_norm": 1.474485197586056, "learning_rate": 8.530515055108038e-06, "loss": 0.3876, "step": 2805 }, { "epoch": 0.27309002433090024, "grad_norm": 1.2926370708159094, "learning_rate": 8.529398789788082e-06, "loss": 0.3239, "step": 2806 }, { "epoch": 0.2731873479318735, "grad_norm": 1.1171205940753008, "learning_rate": 8.528282173749599e-06, "loss": 0.3135, "step": 2807 }, { "epoch": 0.2732846715328467, "grad_norm": 1.3561762741371761, "learning_rate": 8.527165207103546e-06, "loss": 0.4686, "step": 2808 }, { "epoch": 0.27338199513381994, "grad_norm": 1.3082129080843141, "learning_rate": 8.52604788996092e-06, "loss": 0.4274, "step": 2809 }, { "epoch": 0.2734793187347932, "grad_norm": 1.2958697823961909, "learning_rate": 8.524930222432748e-06, "loss": 0.4334, "step": 2810 }, { "epoch": 0.2735766423357664, "grad_norm": 1.4541266485936315, "learning_rate": 8.523812204630093e-06, "loss": 0.5685, "step": 2811 }, { "epoch": 0.27367396593673965, "grad_norm": 1.3303596097899522, "learning_rate": 8.522693836664052e-06, "loss": 0.4305, "step": 2812 }, { "epoch": 0.2737712895377129, "grad_norm": 1.220005269273729, "learning_rate": 8.521575118645761e-06, "loss": 0.4281, "step": 2813 }, { "epoch": 0.27386861313868616, "grad_norm": 1.0981673276035366, "learning_rate": 8.520456050686384e-06, "loss": 0.3641, "step": 2814 }, { "epoch": 0.27396593673965935, "grad_norm": 1.4310281439998578, "learning_rate": 8.519336632897128e-06, "loss": 0.557, "step": 2815 }, { "epoch": 0.2740632603406326, "grad_norm": 1.345841620727785, "learning_rate": 8.518216865389227e-06, "loss": 0.3991, "step": 2816 }, { "epoch": 0.27416058394160586, "grad_norm": 1.6650753610183784, "learning_rate": 8.517096748273951e-06, "loss": 0.3624, "step": 2817 }, { "epoch": 0.27425790754257906, "grad_norm": 1.2633026385457689, "learning_rate": 8.515976281662613e-06, "loss": 0.349, "step": 2818 }, { "epoch": 0.2743552311435523, "grad_norm": 1.392024932172172, "learning_rate": 8.514855465666546e-06, "loss": 0.4514, "step": 2819 }, { "epoch": 0.27445255474452557, "grad_norm": 1.4295145565971665, "learning_rate": 8.513734300397135e-06, "loss": 0.5668, "step": 2820 }, { "epoch": 0.27454987834549877, "grad_norm": 1.0967459926110283, "learning_rate": 8.512612785965787e-06, "loss": 0.1808, "step": 2821 }, { "epoch": 0.274647201946472, "grad_norm": 1.4843839946273536, "learning_rate": 8.511490922483946e-06, "loss": 0.4352, "step": 2822 }, { "epoch": 0.2747445255474453, "grad_norm": 1.339649820333997, "learning_rate": 8.510368710063093e-06, "loss": 0.3137, "step": 2823 }, { "epoch": 0.27484184914841847, "grad_norm": 1.32567882782868, "learning_rate": 8.509246148814745e-06, "loss": 0.4089, "step": 2824 }, { "epoch": 0.2749391727493917, "grad_norm": 1.2497731956714773, "learning_rate": 8.50812323885045e-06, "loss": 0.382, "step": 2825 }, { "epoch": 0.275036496350365, "grad_norm": 1.5771259884963846, "learning_rate": 8.506999980281791e-06, "loss": 0.501, "step": 2826 }, { "epoch": 0.2751338199513382, "grad_norm": 1.3295615561309837, "learning_rate": 8.505876373220393e-06, "loss": 0.3635, "step": 2827 }, { "epoch": 0.27523114355231143, "grad_norm": 1.55543645713159, "learning_rate": 8.504752417777899e-06, "loss": 0.2986, "step": 2828 }, { "epoch": 0.2753284671532847, "grad_norm": 1.421283473121396, "learning_rate": 8.503628114066008e-06, "loss": 0.4931, "step": 2829 }, { "epoch": 0.2754257907542579, "grad_norm": 1.1988827610585986, "learning_rate": 8.502503462196435e-06, "loss": 0.3272, "step": 2830 }, { "epoch": 0.27552311435523114, "grad_norm": 1.6163491550131937, "learning_rate": 8.501378462280941e-06, "loss": 0.5794, "step": 2831 }, { "epoch": 0.2756204379562044, "grad_norm": 1.6499795796835799, "learning_rate": 8.500253114431316e-06, "loss": 0.3668, "step": 2832 }, { "epoch": 0.27571776155717764, "grad_norm": 1.7305434923413188, "learning_rate": 8.499127418759388e-06, "loss": 0.5291, "step": 2833 }, { "epoch": 0.27581508515815084, "grad_norm": 1.4062980643641485, "learning_rate": 8.498001375377018e-06, "loss": 0.4645, "step": 2834 }, { "epoch": 0.2759124087591241, "grad_norm": 1.2961260919749351, "learning_rate": 8.496874984396101e-06, "loss": 0.2517, "step": 2835 }, { "epoch": 0.27600973236009735, "grad_norm": 1.4273972641674804, "learning_rate": 8.495748245928568e-06, "loss": 0.4705, "step": 2836 }, { "epoch": 0.27610705596107055, "grad_norm": 1.1525746776855315, "learning_rate": 8.494621160086383e-06, "loss": 0.3747, "step": 2837 }, { "epoch": 0.2762043795620438, "grad_norm": 1.6083708658269757, "learning_rate": 8.493493726981545e-06, "loss": 0.5754, "step": 2838 }, { "epoch": 0.27630170316301705, "grad_norm": 1.6380932846987073, "learning_rate": 8.492365946726087e-06, "loss": 0.4668, "step": 2839 }, { "epoch": 0.27639902676399025, "grad_norm": 1.3587028332396105, "learning_rate": 8.491237819432081e-06, "loss": 0.3466, "step": 2840 }, { "epoch": 0.2764963503649635, "grad_norm": 1.5812508624530597, "learning_rate": 8.490109345211625e-06, "loss": 0.628, "step": 2841 }, { "epoch": 0.27659367396593676, "grad_norm": 1.359461682943084, "learning_rate": 8.48898052417686e-06, "loss": 0.4799, "step": 2842 }, { "epoch": 0.27669099756690996, "grad_norm": 1.3773089875645015, "learning_rate": 8.487851356439953e-06, "loss": 0.3064, "step": 2843 }, { "epoch": 0.2767883211678832, "grad_norm": 1.445505572645753, "learning_rate": 8.486721842113114e-06, "loss": 0.4629, "step": 2844 }, { "epoch": 0.27688564476885646, "grad_norm": 2.1729540442826796, "learning_rate": 8.485591981308584e-06, "loss": 0.501, "step": 2845 }, { "epoch": 0.27698296836982966, "grad_norm": 1.2698072866971275, "learning_rate": 8.484461774138635e-06, "loss": 0.3354, "step": 2846 }, { "epoch": 0.2770802919708029, "grad_norm": 1.2270792461817257, "learning_rate": 8.483331220715578e-06, "loss": 0.2925, "step": 2847 }, { "epoch": 0.27717761557177617, "grad_norm": 1.4982940191444252, "learning_rate": 8.482200321151757e-06, "loss": 0.4372, "step": 2848 }, { "epoch": 0.27727493917274937, "grad_norm": 1.7962422459275051, "learning_rate": 8.48106907555955e-06, "loss": 0.2514, "step": 2849 }, { "epoch": 0.2773722627737226, "grad_norm": 1.1765428275481227, "learning_rate": 8.479937484051368e-06, "loss": 0.2466, "step": 2850 }, { "epoch": 0.2774695863746959, "grad_norm": 1.3671035304850088, "learning_rate": 8.47880554673966e-06, "loss": 0.4388, "step": 2851 }, { "epoch": 0.2775669099756691, "grad_norm": 1.584083262413021, "learning_rate": 8.477673263736908e-06, "loss": 0.3117, "step": 2852 }, { "epoch": 0.2776642335766423, "grad_norm": 1.6251518472003594, "learning_rate": 8.476540635155623e-06, "loss": 0.4661, "step": 2853 }, { "epoch": 0.2777615571776156, "grad_norm": 1.6392857489539867, "learning_rate": 8.475407661108361e-06, "loss": 0.354, "step": 2854 }, { "epoch": 0.27785888077858883, "grad_norm": 1.3195625296951223, "learning_rate": 8.474274341707702e-06, "loss": 0.3744, "step": 2855 }, { "epoch": 0.27795620437956203, "grad_norm": 1.34410915454318, "learning_rate": 8.473140677066267e-06, "loss": 0.4069, "step": 2856 }, { "epoch": 0.2780535279805353, "grad_norm": 1.0527413957181246, "learning_rate": 8.472006667296709e-06, "loss": 0.2776, "step": 2857 }, { "epoch": 0.27815085158150854, "grad_norm": 1.496471387248685, "learning_rate": 8.470872312511714e-06, "loss": 0.3642, "step": 2858 }, { "epoch": 0.27824817518248174, "grad_norm": 1.532429299396127, "learning_rate": 8.469737612824001e-06, "loss": 0.44, "step": 2859 }, { "epoch": 0.278345498783455, "grad_norm": 1.601112711944827, "learning_rate": 8.468602568346332e-06, "loss": 0.421, "step": 2860 }, { "epoch": 0.27844282238442825, "grad_norm": 1.5148720198103927, "learning_rate": 8.467467179191493e-06, "loss": 0.5258, "step": 2861 }, { "epoch": 0.27854014598540144, "grad_norm": 1.573048120862393, "learning_rate": 8.466331445472308e-06, "loss": 0.4507, "step": 2862 }, { "epoch": 0.2786374695863747, "grad_norm": 1.3938890789758775, "learning_rate": 8.465195367301639e-06, "loss": 0.3365, "step": 2863 }, { "epoch": 0.27873479318734795, "grad_norm": 1.6895380781567202, "learning_rate": 8.464058944792375e-06, "loss": 0.4132, "step": 2864 }, { "epoch": 0.27883211678832115, "grad_norm": 1.6880546647255488, "learning_rate": 8.462922178057444e-06, "loss": 0.2605, "step": 2865 }, { "epoch": 0.2789294403892944, "grad_norm": 1.491755717654464, "learning_rate": 8.46178506720981e-06, "loss": 0.3983, "step": 2866 }, { "epoch": 0.27902676399026766, "grad_norm": 1.5848666178901887, "learning_rate": 8.460647612362464e-06, "loss": 0.5101, "step": 2867 }, { "epoch": 0.27912408759124085, "grad_norm": 1.3442317187907376, "learning_rate": 8.459509813628437e-06, "loss": 0.458, "step": 2868 }, { "epoch": 0.2792214111922141, "grad_norm": 1.8095809186860319, "learning_rate": 8.458371671120795e-06, "loss": 0.382, "step": 2869 }, { "epoch": 0.27931873479318736, "grad_norm": 0.9909926300929587, "learning_rate": 8.457233184952635e-06, "loss": 0.2292, "step": 2870 }, { "epoch": 0.27941605839416056, "grad_norm": 1.7013118787018624, "learning_rate": 8.456094355237086e-06, "loss": 0.6861, "step": 2871 }, { "epoch": 0.2795133819951338, "grad_norm": 3.4293212695090025, "learning_rate": 8.45495518208732e-06, "loss": 0.3233, "step": 2872 }, { "epoch": 0.27961070559610707, "grad_norm": 1.4903797163776311, "learning_rate": 8.45381566561653e-06, "loss": 0.3231, "step": 2873 }, { "epoch": 0.27970802919708027, "grad_norm": 1.5615177882070261, "learning_rate": 8.452675805937956e-06, "loss": 0.4125, "step": 2874 }, { "epoch": 0.2798053527980535, "grad_norm": 1.4099046900170047, "learning_rate": 8.451535603164865e-06, "loss": 0.4967, "step": 2875 }, { "epoch": 0.2799026763990268, "grad_norm": 1.383217014263479, "learning_rate": 8.450395057410561e-06, "loss": 0.3411, "step": 2876 }, { "epoch": 0.28, "grad_norm": 1.2661588037606646, "learning_rate": 8.449254168788377e-06, "loss": 0.3734, "step": 2877 }, { "epoch": 0.2800973236009732, "grad_norm": 1.4107359648240771, "learning_rate": 8.448112937411689e-06, "loss": 0.4765, "step": 2878 }, { "epoch": 0.2801946472019465, "grad_norm": 1.567373989947911, "learning_rate": 8.446971363393897e-06, "loss": 0.5806, "step": 2879 }, { "epoch": 0.28029197080291973, "grad_norm": 1.5980994022663064, "learning_rate": 8.445829446848442e-06, "loss": 0.3765, "step": 2880 }, { "epoch": 0.28038929440389293, "grad_norm": 1.5582627635759285, "learning_rate": 8.444687187888798e-06, "loss": 0.3838, "step": 2881 }, { "epoch": 0.2804866180048662, "grad_norm": 2.097365147798996, "learning_rate": 8.44354458662847e-06, "loss": 0.6467, "step": 2882 }, { "epoch": 0.28058394160583944, "grad_norm": 1.5302257615618868, "learning_rate": 8.442401643181e-06, "loss": 0.4415, "step": 2883 }, { "epoch": 0.28068126520681264, "grad_norm": 1.1646338986978766, "learning_rate": 8.441258357659962e-06, "loss": 0.3176, "step": 2884 }, { "epoch": 0.2807785888077859, "grad_norm": 1.2287928718701633, "learning_rate": 8.440114730178968e-06, "loss": 0.4175, "step": 2885 }, { "epoch": 0.28087591240875914, "grad_norm": 1.4416072881006319, "learning_rate": 8.438970760851658e-06, "loss": 0.4838, "step": 2886 }, { "epoch": 0.28097323600973234, "grad_norm": 1.319870372533973, "learning_rate": 8.437826449791709e-06, "loss": 0.3421, "step": 2887 }, { "epoch": 0.2810705596107056, "grad_norm": 1.6261475252650914, "learning_rate": 8.436681797112833e-06, "loss": 0.5019, "step": 2888 }, { "epoch": 0.28116788321167885, "grad_norm": 1.6203143716652342, "learning_rate": 8.435536802928774e-06, "loss": 0.4282, "step": 2889 }, { "epoch": 0.28126520681265205, "grad_norm": 1.4127079920263665, "learning_rate": 8.434391467353312e-06, "loss": 0.4542, "step": 2890 }, { "epoch": 0.2813625304136253, "grad_norm": 1.1756885783532405, "learning_rate": 8.433245790500258e-06, "loss": 0.3563, "step": 2891 }, { "epoch": 0.28145985401459855, "grad_norm": 1.1824997482138238, "learning_rate": 8.43209977248346e-06, "loss": 0.3628, "step": 2892 }, { "epoch": 0.28155717761557175, "grad_norm": 1.4280724079623635, "learning_rate": 8.430953413416798e-06, "loss": 0.446, "step": 2893 }, { "epoch": 0.281654501216545, "grad_norm": 1.0710350994410123, "learning_rate": 8.429806713414188e-06, "loss": 0.2016, "step": 2894 }, { "epoch": 0.28175182481751826, "grad_norm": 1.453985226232095, "learning_rate": 8.428659672589574e-06, "loss": 0.4325, "step": 2895 }, { "epoch": 0.28184914841849146, "grad_norm": 1.3045306996673216, "learning_rate": 8.427512291056943e-06, "loss": 0.3838, "step": 2896 }, { "epoch": 0.2819464720194647, "grad_norm": 1.483337521636422, "learning_rate": 8.426364568930309e-06, "loss": 0.4212, "step": 2897 }, { "epoch": 0.28204379562043796, "grad_norm": 1.0901324802348065, "learning_rate": 8.425216506323721e-06, "loss": 0.2392, "step": 2898 }, { "epoch": 0.2821411192214112, "grad_norm": 1.3761268679827663, "learning_rate": 8.424068103351264e-06, "loss": 0.4459, "step": 2899 }, { "epoch": 0.2822384428223844, "grad_norm": 1.461105500215717, "learning_rate": 8.422919360127053e-06, "loss": 0.5018, "step": 2900 }, { "epoch": 0.28233576642335767, "grad_norm": 1.4314465150478046, "learning_rate": 8.421770276765245e-06, "loss": 0.4474, "step": 2901 }, { "epoch": 0.2824330900243309, "grad_norm": 1.6060806185106393, "learning_rate": 8.420620853380018e-06, "loss": 0.5798, "step": 2902 }, { "epoch": 0.2825304136253041, "grad_norm": 1.4468000025910832, "learning_rate": 8.419471090085596e-06, "loss": 0.5597, "step": 2903 }, { "epoch": 0.2826277372262774, "grad_norm": 5.585104457387235, "learning_rate": 8.41832098699623e-06, "loss": 0.3493, "step": 2904 }, { "epoch": 0.28272506082725063, "grad_norm": 1.3577816273786794, "learning_rate": 8.417170544226205e-06, "loss": 0.3262, "step": 2905 }, { "epoch": 0.2828223844282238, "grad_norm": 1.1546363912171016, "learning_rate": 8.416019761889845e-06, "loss": 0.3691, "step": 2906 }, { "epoch": 0.2829197080291971, "grad_norm": 1.3224407401265832, "learning_rate": 8.4148686401015e-06, "loss": 0.3079, "step": 2907 }, { "epoch": 0.28301703163017033, "grad_norm": 1.5947860641264806, "learning_rate": 8.413717178975558e-06, "loss": 0.277, "step": 2908 }, { "epoch": 0.28311435523114353, "grad_norm": 1.343045870800707, "learning_rate": 8.412565378626442e-06, "loss": 0.3448, "step": 2909 }, { "epoch": 0.2832116788321168, "grad_norm": 1.5567901041780798, "learning_rate": 8.411413239168609e-06, "loss": 0.3954, "step": 2910 }, { "epoch": 0.28330900243309004, "grad_norm": 1.5232536009297208, "learning_rate": 8.410260760716545e-06, "loss": 0.5103, "step": 2911 }, { "epoch": 0.28340632603406324, "grad_norm": 1.2493384040941995, "learning_rate": 8.409107943384773e-06, "loss": 0.3671, "step": 2912 }, { "epoch": 0.2835036496350365, "grad_norm": 1.246217249188392, "learning_rate": 8.407954787287848e-06, "loss": 0.4112, "step": 2913 }, { "epoch": 0.28360097323600975, "grad_norm": 1.2012340002353967, "learning_rate": 8.406801292540364e-06, "loss": 0.3769, "step": 2914 }, { "epoch": 0.28369829683698294, "grad_norm": 1.51749407168492, "learning_rate": 8.405647459256939e-06, "loss": 0.5515, "step": 2915 }, { "epoch": 0.2837956204379562, "grad_norm": 1.1589770762667257, "learning_rate": 8.404493287552232e-06, "loss": 0.2577, "step": 2916 }, { "epoch": 0.28389294403892945, "grad_norm": 1.5139932402052954, "learning_rate": 8.403338777540936e-06, "loss": 0.4796, "step": 2917 }, { "epoch": 0.28399026763990265, "grad_norm": 1.5544290759133006, "learning_rate": 8.402183929337774e-06, "loss": 0.4594, "step": 2918 }, { "epoch": 0.2840875912408759, "grad_norm": 1.3525572627526583, "learning_rate": 8.401028743057503e-06, "loss": 0.3978, "step": 2919 }, { "epoch": 0.28418491484184916, "grad_norm": 1.3610916698563846, "learning_rate": 8.399873218814916e-06, "loss": 0.4308, "step": 2920 }, { "epoch": 0.2842822384428224, "grad_norm": 1.2060322500759533, "learning_rate": 8.398717356724837e-06, "loss": 0.482, "step": 2921 }, { "epoch": 0.2843795620437956, "grad_norm": 1.152727586861314, "learning_rate": 8.397561156902126e-06, "loss": 0.3862, "step": 2922 }, { "epoch": 0.28447688564476886, "grad_norm": 1.6371195081735355, "learning_rate": 8.396404619461673e-06, "loss": 0.684, "step": 2923 }, { "epoch": 0.2845742092457421, "grad_norm": 1.4756480619833048, "learning_rate": 8.395247744518407e-06, "loss": 0.4432, "step": 2924 }, { "epoch": 0.2846715328467153, "grad_norm": 1.3495353534897125, "learning_rate": 8.394090532187286e-06, "loss": 0.4574, "step": 2925 }, { "epoch": 0.28476885644768857, "grad_norm": 1.361248347874279, "learning_rate": 8.392932982583301e-06, "loss": 0.3117, "step": 2926 }, { "epoch": 0.2848661800486618, "grad_norm": 1.5493409509214389, "learning_rate": 8.391775095821481e-06, "loss": 0.5949, "step": 2927 }, { "epoch": 0.284963503649635, "grad_norm": 1.5159424124979992, "learning_rate": 8.390616872016886e-06, "loss": 0.612, "step": 2928 }, { "epoch": 0.2850608272506083, "grad_norm": 0.9819694068633834, "learning_rate": 8.389458311284606e-06, "loss": 0.2407, "step": 2929 }, { "epoch": 0.2851581508515815, "grad_norm": 1.4338313849048412, "learning_rate": 8.388299413739772e-06, "loss": 0.504, "step": 2930 }, { "epoch": 0.2852554744525547, "grad_norm": 1.6033282710660985, "learning_rate": 8.387140179497541e-06, "loss": 0.4686, "step": 2931 }, { "epoch": 0.285352798053528, "grad_norm": 1.4339139409278308, "learning_rate": 8.38598060867311e-06, "loss": 0.5885, "step": 2932 }, { "epoch": 0.28545012165450123, "grad_norm": 1.6962944035069916, "learning_rate": 8.384820701381705e-06, "loss": 0.6325, "step": 2933 }, { "epoch": 0.28554744525547443, "grad_norm": 1.2380931242026982, "learning_rate": 8.383660457738585e-06, "loss": 0.3528, "step": 2934 }, { "epoch": 0.2856447688564477, "grad_norm": 1.4958548492045998, "learning_rate": 8.382499877859046e-06, "loss": 0.5261, "step": 2935 }, { "epoch": 0.28574209245742094, "grad_norm": 1.2493863506860636, "learning_rate": 8.381338961858417e-06, "loss": 0.295, "step": 2936 }, { "epoch": 0.28583941605839414, "grad_norm": 1.0264542939220365, "learning_rate": 8.380177709852055e-06, "loss": 0.2736, "step": 2937 }, { "epoch": 0.2859367396593674, "grad_norm": 1.3694559515073481, "learning_rate": 8.379016121955358e-06, "loss": 0.2437, "step": 2938 }, { "epoch": 0.28603406326034064, "grad_norm": 1.3958652644514353, "learning_rate": 8.377854198283751e-06, "loss": 0.5162, "step": 2939 }, { "epoch": 0.28613138686131384, "grad_norm": 1.3188642877167738, "learning_rate": 8.376691938952694e-06, "loss": 0.4403, "step": 2940 }, { "epoch": 0.2862287104622871, "grad_norm": 1.5563883463328907, "learning_rate": 8.375529344077686e-06, "loss": 0.3871, "step": 2941 }, { "epoch": 0.28632603406326035, "grad_norm": 1.7106139691477682, "learning_rate": 8.37436641377425e-06, "loss": 0.5998, "step": 2942 }, { "epoch": 0.2864233576642336, "grad_norm": 1.8227768617334648, "learning_rate": 8.373203148157953e-06, "loss": 0.4192, "step": 2943 }, { "epoch": 0.2865206812652068, "grad_norm": 1.3645142496496503, "learning_rate": 8.372039547344383e-06, "loss": 0.4301, "step": 2944 }, { "epoch": 0.28661800486618005, "grad_norm": 1.4644520960794265, "learning_rate": 8.370875611449173e-06, "loss": 0.4333, "step": 2945 }, { "epoch": 0.2867153284671533, "grad_norm": 1.3686778637415178, "learning_rate": 8.369711340587981e-06, "loss": 0.4735, "step": 2946 }, { "epoch": 0.2868126520681265, "grad_norm": 1.7752150982830557, "learning_rate": 8.368546734876499e-06, "loss": 0.605, "step": 2947 }, { "epoch": 0.28690997566909976, "grad_norm": 1.6349896239905135, "learning_rate": 8.36738179443046e-06, "loss": 0.4521, "step": 2948 }, { "epoch": 0.287007299270073, "grad_norm": 1.7001103309282906, "learning_rate": 8.366216519365623e-06, "loss": 0.5243, "step": 2949 }, { "epoch": 0.2871046228710462, "grad_norm": 1.3288526449094853, "learning_rate": 8.365050909797779e-06, "loss": 0.4226, "step": 2950 }, { "epoch": 0.28720194647201946, "grad_norm": 1.0609308885865543, "learning_rate": 8.36388496584276e-06, "loss": 0.2761, "step": 2951 }, { "epoch": 0.2872992700729927, "grad_norm": 1.3048762567541314, "learning_rate": 8.362718687616422e-06, "loss": 0.3166, "step": 2952 }, { "epoch": 0.2873965936739659, "grad_norm": 1.5602591658770568, "learning_rate": 8.361552075234664e-06, "loss": 0.1814, "step": 2953 }, { "epoch": 0.28749391727493917, "grad_norm": 1.261612878851385, "learning_rate": 8.360385128813409e-06, "loss": 0.3431, "step": 2954 }, { "epoch": 0.2875912408759124, "grad_norm": 1.6502840086679433, "learning_rate": 8.359217848468617e-06, "loss": 0.5688, "step": 2955 }, { "epoch": 0.2876885644768856, "grad_norm": 1.1758618501430975, "learning_rate": 8.358050234316283e-06, "loss": 0.376, "step": 2956 }, { "epoch": 0.2877858880778589, "grad_norm": 1.3748216513361973, "learning_rate": 8.356882286472433e-06, "loss": 0.4893, "step": 2957 }, { "epoch": 0.28788321167883213, "grad_norm": 1.490557754247365, "learning_rate": 8.35571400505313e-06, "loss": 0.4322, "step": 2958 }, { "epoch": 0.2879805352798053, "grad_norm": 1.2474734521766377, "learning_rate": 8.35454539017446e-06, "loss": 0.249, "step": 2959 }, { "epoch": 0.2880778588807786, "grad_norm": 1.3041956082790018, "learning_rate": 8.353376441952554e-06, "loss": 0.3629, "step": 2960 }, { "epoch": 0.28817518248175183, "grad_norm": 1.1813542799359134, "learning_rate": 8.352207160503572e-06, "loss": 0.2541, "step": 2961 }, { "epoch": 0.2882725060827251, "grad_norm": 1.6196703441196314, "learning_rate": 8.351037545943702e-06, "loss": 0.5863, "step": 2962 }, { "epoch": 0.2883698296836983, "grad_norm": 1.6020435634219072, "learning_rate": 8.34986759838917e-06, "loss": 0.5539, "step": 2963 }, { "epoch": 0.28846715328467154, "grad_norm": 1.6170521555116952, "learning_rate": 8.348697317956238e-06, "loss": 0.4234, "step": 2964 }, { "epoch": 0.2885644768856448, "grad_norm": 1.2300623631368495, "learning_rate": 8.347526704761193e-06, "loss": 0.2784, "step": 2965 }, { "epoch": 0.288661800486618, "grad_norm": 2.179168092375873, "learning_rate": 8.346355758920364e-06, "loss": 0.4561, "step": 2966 }, { "epoch": 0.28875912408759125, "grad_norm": 1.5135423174141494, "learning_rate": 8.345184480550104e-06, "loss": 0.3807, "step": 2967 }, { "epoch": 0.2888564476885645, "grad_norm": 1.7005351963186346, "learning_rate": 8.344012869766808e-06, "loss": 0.538, "step": 2968 }, { "epoch": 0.2889537712895377, "grad_norm": 1.2789157911351394, "learning_rate": 8.342840926686898e-06, "loss": 0.2623, "step": 2969 }, { "epoch": 0.28905109489051095, "grad_norm": 1.304761873055631, "learning_rate": 8.34166865142683e-06, "loss": 0.4219, "step": 2970 }, { "epoch": 0.2891484184914842, "grad_norm": 1.6192760894025877, "learning_rate": 8.340496044103095e-06, "loss": 0.4378, "step": 2971 }, { "epoch": 0.2892457420924574, "grad_norm": 1.4363442626245757, "learning_rate": 8.339323104832214e-06, "loss": 0.3819, "step": 2972 }, { "epoch": 0.28934306569343066, "grad_norm": 1.5094300127764981, "learning_rate": 8.338149833730742e-06, "loss": 0.2769, "step": 2973 }, { "epoch": 0.2894403892944039, "grad_norm": 1.6047897202306092, "learning_rate": 8.33697623091527e-06, "loss": 0.424, "step": 2974 }, { "epoch": 0.2895377128953771, "grad_norm": 1.3129110600868221, "learning_rate": 8.33580229650242e-06, "loss": 0.5053, "step": 2975 }, { "epoch": 0.28963503649635036, "grad_norm": 1.1812562932245452, "learning_rate": 8.334628030608845e-06, "loss": 0.3835, "step": 2976 }, { "epoch": 0.2897323600973236, "grad_norm": 1.2211203388582414, "learning_rate": 8.333453433351233e-06, "loss": 0.3531, "step": 2977 }, { "epoch": 0.2898296836982968, "grad_norm": 1.4620903484748373, "learning_rate": 8.332278504846303e-06, "loss": 0.4771, "step": 2978 }, { "epoch": 0.28992700729927007, "grad_norm": 0.9704255718501243, "learning_rate": 8.331103245210812e-06, "loss": 0.2618, "step": 2979 }, { "epoch": 0.2900243309002433, "grad_norm": 1.2827724622455963, "learning_rate": 8.329927654561544e-06, "loss": 0.3052, "step": 2980 }, { "epoch": 0.2901216545012165, "grad_norm": 1.378581411338256, "learning_rate": 8.328751733015316e-06, "loss": 0.3568, "step": 2981 }, { "epoch": 0.2902189781021898, "grad_norm": 1.769807570821765, "learning_rate": 8.327575480688985e-06, "loss": 0.3102, "step": 2982 }, { "epoch": 0.290316301703163, "grad_norm": 1.4326301683333176, "learning_rate": 8.32639889769943e-06, "loss": 0.3218, "step": 2983 }, { "epoch": 0.2904136253041363, "grad_norm": 1.5418816322088151, "learning_rate": 8.325221984163575e-06, "loss": 0.3257, "step": 2984 }, { "epoch": 0.2905109489051095, "grad_norm": 1.573484642436306, "learning_rate": 8.324044740198366e-06, "loss": 0.5401, "step": 2985 }, { "epoch": 0.29060827250608273, "grad_norm": 1.2270555416429247, "learning_rate": 8.322867165920789e-06, "loss": 0.3914, "step": 2986 }, { "epoch": 0.290705596107056, "grad_norm": 1.1838846887742434, "learning_rate": 8.321689261447858e-06, "loss": 0.3282, "step": 2987 }, { "epoch": 0.2908029197080292, "grad_norm": 1.5077214188811954, "learning_rate": 8.320511026896624e-06, "loss": 0.5279, "step": 2988 }, { "epoch": 0.29090024330900244, "grad_norm": 1.1784061774291985, "learning_rate": 8.31933246238417e-06, "loss": 0.403, "step": 2989 }, { "epoch": 0.2909975669099757, "grad_norm": 1.2176703537151474, "learning_rate": 8.318153568027607e-06, "loss": 0.4213, "step": 2990 }, { "epoch": 0.2910948905109489, "grad_norm": 1.3475262123063816, "learning_rate": 8.316974343944085e-06, "loss": 0.4059, "step": 2991 }, { "epoch": 0.29119221411192214, "grad_norm": 1.2398233047847593, "learning_rate": 8.315794790250784e-06, "loss": 0.2626, "step": 2992 }, { "epoch": 0.2912895377128954, "grad_norm": 1.3862498175549538, "learning_rate": 8.314614907064915e-06, "loss": 0.4535, "step": 2993 }, { "epoch": 0.2913868613138686, "grad_norm": 1.455622096437578, "learning_rate": 8.313434694503727e-06, "loss": 0.4067, "step": 2994 }, { "epoch": 0.29148418491484185, "grad_norm": 1.4755183973829757, "learning_rate": 8.312254152684496e-06, "loss": 0.6493, "step": 2995 }, { "epoch": 0.2915815085158151, "grad_norm": 1.0399713771806027, "learning_rate": 8.311073281724536e-06, "loss": 0.3051, "step": 2996 }, { "epoch": 0.2916788321167883, "grad_norm": 1.3151300509583979, "learning_rate": 8.309892081741186e-06, "loss": 0.3982, "step": 2997 }, { "epoch": 0.29177615571776155, "grad_norm": 1.376541833798208, "learning_rate": 8.308710552851826e-06, "loss": 0.4749, "step": 2998 }, { "epoch": 0.2918734793187348, "grad_norm": 1.2551786912554768, "learning_rate": 8.307528695173865e-06, "loss": 0.3118, "step": 2999 }, { "epoch": 0.291970802919708, "grad_norm": 2.1707038191553463, "learning_rate": 8.306346508824746e-06, "loss": 0.3438, "step": 3000 }, { "epoch": 0.29206812652068126, "grad_norm": 1.4299459588569998, "learning_rate": 8.30516399392194e-06, "loss": 0.4838, "step": 3001 }, { "epoch": 0.2921654501216545, "grad_norm": 1.378341342959643, "learning_rate": 8.303981150582958e-06, "loss": 0.5055, "step": 3002 }, { "epoch": 0.2922627737226277, "grad_norm": 1.4826508798742193, "learning_rate": 8.302797978925338e-06, "loss": 0.3737, "step": 3003 }, { "epoch": 0.29236009732360096, "grad_norm": 1.222513403789782, "learning_rate": 8.301614479066653e-06, "loss": 0.4587, "step": 3004 }, { "epoch": 0.2924574209245742, "grad_norm": 1.3819233250029228, "learning_rate": 8.300430651124508e-06, "loss": 0.4021, "step": 3005 }, { "epoch": 0.29255474452554747, "grad_norm": 1.2846536784172882, "learning_rate": 8.29924649521654e-06, "loss": 0.3609, "step": 3006 }, { "epoch": 0.29265206812652067, "grad_norm": 1.4274226525457885, "learning_rate": 8.298062011460419e-06, "loss": 0.5267, "step": 3007 }, { "epoch": 0.2927493917274939, "grad_norm": 1.4642655922839627, "learning_rate": 8.296877199973849e-06, "loss": 0.3499, "step": 3008 }, { "epoch": 0.2928467153284672, "grad_norm": 1.4317302181421974, "learning_rate": 8.295692060874568e-06, "loss": 0.4979, "step": 3009 }, { "epoch": 0.2929440389294404, "grad_norm": 1.3191877461185262, "learning_rate": 8.294506594280338e-06, "loss": 0.2835, "step": 3010 }, { "epoch": 0.29304136253041363, "grad_norm": 1.0943861065294986, "learning_rate": 8.293320800308964e-06, "loss": 0.2138, "step": 3011 }, { "epoch": 0.2931386861313869, "grad_norm": 1.2621219805575281, "learning_rate": 8.292134679078277e-06, "loss": 0.3027, "step": 3012 }, { "epoch": 0.2932360097323601, "grad_norm": 1.556172337566686, "learning_rate": 8.290948230706145e-06, "loss": 0.4462, "step": 3013 }, { "epoch": 0.29333333333333333, "grad_norm": 1.3363658374504028, "learning_rate": 8.289761455310463e-06, "loss": 0.373, "step": 3014 }, { "epoch": 0.2934306569343066, "grad_norm": 1.4458593210455408, "learning_rate": 8.288574353009164e-06, "loss": 0.5566, "step": 3015 }, { "epoch": 0.2935279805352798, "grad_norm": 1.5034274044899172, "learning_rate": 8.287386923920211e-06, "loss": 0.3837, "step": 3016 }, { "epoch": 0.29362530413625304, "grad_norm": 1.484769748600726, "learning_rate": 8.286199168161598e-06, "loss": 0.3173, "step": 3017 }, { "epoch": 0.2937226277372263, "grad_norm": 1.4336064725306121, "learning_rate": 8.285011085851353e-06, "loss": 0.4005, "step": 3018 }, { "epoch": 0.2938199513381995, "grad_norm": 1.3857231757141482, "learning_rate": 8.283822677107539e-06, "loss": 0.481, "step": 3019 }, { "epoch": 0.29391727493917275, "grad_norm": 1.4086307294395457, "learning_rate": 8.282633942048244e-06, "loss": 0.4181, "step": 3020 }, { "epoch": 0.294014598540146, "grad_norm": 1.4701075671537391, "learning_rate": 8.2814448807916e-06, "loss": 0.4041, "step": 3021 }, { "epoch": 0.2941119221411192, "grad_norm": 1.5925621393078395, "learning_rate": 8.28025549345576e-06, "loss": 0.3062, "step": 3022 }, { "epoch": 0.29420924574209245, "grad_norm": 1.6058911141553376, "learning_rate": 8.279065780158914e-06, "loss": 0.5534, "step": 3023 }, { "epoch": 0.2943065693430657, "grad_norm": 1.4134575830281486, "learning_rate": 8.277875741019289e-06, "loss": 0.5017, "step": 3024 }, { "epoch": 0.2944038929440389, "grad_norm": 1.6163740830610969, "learning_rate": 8.276685376155133e-06, "loss": 0.5513, "step": 3025 }, { "epoch": 0.29450121654501216, "grad_norm": 1.3415920762045879, "learning_rate": 8.275494685684739e-06, "loss": 0.4209, "step": 3026 }, { "epoch": 0.2945985401459854, "grad_norm": 1.699522776097275, "learning_rate": 8.274303669726427e-06, "loss": 0.2444, "step": 3027 }, { "epoch": 0.29469586374695866, "grad_norm": 1.3118143561432465, "learning_rate": 8.273112328398545e-06, "loss": 0.3282, "step": 3028 }, { "epoch": 0.29479318734793186, "grad_norm": 1.3608335365502384, "learning_rate": 8.271920661819479e-06, "loss": 0.4625, "step": 3029 }, { "epoch": 0.2948905109489051, "grad_norm": 1.320965035708582, "learning_rate": 8.270728670107645e-06, "loss": 0.4161, "step": 3030 }, { "epoch": 0.29498783454987837, "grad_norm": 1.2315684415049128, "learning_rate": 8.269536353381493e-06, "loss": 0.3264, "step": 3031 }, { "epoch": 0.29508515815085157, "grad_norm": 1.2397754210481065, "learning_rate": 8.268343711759505e-06, "loss": 0.3184, "step": 3032 }, { "epoch": 0.2951824817518248, "grad_norm": 1.4717261820272485, "learning_rate": 8.267150745360194e-06, "loss": 0.381, "step": 3033 }, { "epoch": 0.2952798053527981, "grad_norm": 1.7364842416546407, "learning_rate": 8.265957454302102e-06, "loss": 0.3639, "step": 3034 }, { "epoch": 0.2953771289537713, "grad_norm": 1.6249980192905973, "learning_rate": 8.264763838703813e-06, "loss": 0.5112, "step": 3035 }, { "epoch": 0.2954744525547445, "grad_norm": 1.682249094263979, "learning_rate": 8.263569898683934e-06, "loss": 0.4894, "step": 3036 }, { "epoch": 0.2955717761557178, "grad_norm": 1.9200248186176307, "learning_rate": 8.262375634361108e-06, "loss": 0.529, "step": 3037 }, { "epoch": 0.295669099756691, "grad_norm": 1.4426650259998133, "learning_rate": 8.261181045854011e-06, "loss": 0.5037, "step": 3038 }, { "epoch": 0.29576642335766423, "grad_norm": 1.6904227765149746, "learning_rate": 8.259986133281348e-06, "loss": 0.3632, "step": 3039 }, { "epoch": 0.2958637469586375, "grad_norm": 1.3863799205056755, "learning_rate": 8.25879089676186e-06, "loss": 0.4148, "step": 3040 }, { "epoch": 0.2959610705596107, "grad_norm": 1.627436205526306, "learning_rate": 8.257595336414317e-06, "loss": 0.4558, "step": 3041 }, { "epoch": 0.29605839416058394, "grad_norm": 1.3163567598814478, "learning_rate": 8.256399452357524e-06, "loss": 0.2713, "step": 3042 }, { "epoch": 0.2961557177615572, "grad_norm": 1.6072179171276018, "learning_rate": 8.255203244710316e-06, "loss": 0.353, "step": 3043 }, { "epoch": 0.2962530413625304, "grad_norm": 1.4217719575203627, "learning_rate": 8.254006713591559e-06, "loss": 0.3744, "step": 3044 }, { "epoch": 0.29635036496350364, "grad_norm": 1.9013012922141048, "learning_rate": 8.252809859120154e-06, "loss": 0.209, "step": 3045 }, { "epoch": 0.2964476885644769, "grad_norm": 1.390657831725977, "learning_rate": 8.251612681415035e-06, "loss": 0.3722, "step": 3046 }, { "epoch": 0.2965450121654501, "grad_norm": 1.4478686848472833, "learning_rate": 8.250415180595167e-06, "loss": 0.3869, "step": 3047 }, { "epoch": 0.29664233576642335, "grad_norm": 1.1443911522017596, "learning_rate": 8.249217356779544e-06, "loss": 0.3385, "step": 3048 }, { "epoch": 0.2967396593673966, "grad_norm": 1.7245119786652503, "learning_rate": 8.248019210087195e-06, "loss": 0.3023, "step": 3049 }, { "epoch": 0.29683698296836986, "grad_norm": 1.8030337728763741, "learning_rate": 8.24682074063718e-06, "loss": 0.3784, "step": 3050 }, { "epoch": 0.29693430656934305, "grad_norm": 1.299417141317702, "learning_rate": 8.245621948548593e-06, "loss": 0.2963, "step": 3051 }, { "epoch": 0.2970316301703163, "grad_norm": 1.3334468356141627, "learning_rate": 8.244422833940558e-06, "loss": 0.3671, "step": 3052 }, { "epoch": 0.29712895377128956, "grad_norm": 1.6168488226188178, "learning_rate": 8.24322339693223e-06, "loss": 0.5497, "step": 3053 }, { "epoch": 0.29722627737226276, "grad_norm": 1.49700230831562, "learning_rate": 8.242023637642802e-06, "loss": 0.4567, "step": 3054 }, { "epoch": 0.297323600973236, "grad_norm": 1.0494586888942983, "learning_rate": 8.24082355619149e-06, "loss": 0.2186, "step": 3055 }, { "epoch": 0.29742092457420927, "grad_norm": 1.372792205417397, "learning_rate": 8.239623152697553e-06, "loss": 0.5083, "step": 3056 }, { "epoch": 0.29751824817518246, "grad_norm": 1.266230497219453, "learning_rate": 8.238422427280269e-06, "loss": 0.461, "step": 3057 }, { "epoch": 0.2976155717761557, "grad_norm": 1.5041389582539588, "learning_rate": 8.237221380058959e-06, "loss": 0.3813, "step": 3058 }, { "epoch": 0.29771289537712897, "grad_norm": 1.4593593621079823, "learning_rate": 8.23602001115297e-06, "loss": 0.473, "step": 3059 }, { "epoch": 0.29781021897810217, "grad_norm": 1.3666083716931219, "learning_rate": 8.234818320681685e-06, "loss": 0.4822, "step": 3060 }, { "epoch": 0.2979075425790754, "grad_norm": 1.407870228183954, "learning_rate": 8.233616308764513e-06, "loss": 0.4012, "step": 3061 }, { "epoch": 0.2980048661800487, "grad_norm": 1.4404350668596586, "learning_rate": 8.232413975520903e-06, "loss": 0.5057, "step": 3062 }, { "epoch": 0.2981021897810219, "grad_norm": 1.3912456713229528, "learning_rate": 8.231211321070329e-06, "loss": 0.4578, "step": 3063 }, { "epoch": 0.29819951338199513, "grad_norm": 1.3191795228165797, "learning_rate": 8.2300083455323e-06, "loss": 0.3888, "step": 3064 }, { "epoch": 0.2982968369829684, "grad_norm": 1.4258248936492355, "learning_rate": 8.228805049026355e-06, "loss": 0.5108, "step": 3065 }, { "epoch": 0.2983941605839416, "grad_norm": 1.4850835614825084, "learning_rate": 8.22760143167207e-06, "loss": 0.5968, "step": 3066 }, { "epoch": 0.29849148418491483, "grad_norm": 1.2696050534436827, "learning_rate": 8.226397493589044e-06, "loss": 0.3328, "step": 3067 }, { "epoch": 0.2985888077858881, "grad_norm": 1.1993181516723008, "learning_rate": 8.225193234896918e-06, "loss": 0.2682, "step": 3068 }, { "epoch": 0.2986861313868613, "grad_norm": 1.3420953543565923, "learning_rate": 8.223988655715355e-06, "loss": 0.3865, "step": 3069 }, { "epoch": 0.29878345498783454, "grad_norm": 1.305913976862295, "learning_rate": 8.222783756164061e-06, "loss": 0.3551, "step": 3070 }, { "epoch": 0.2988807785888078, "grad_norm": 1.3385899852932626, "learning_rate": 8.221578536362764e-06, "loss": 0.4203, "step": 3071 }, { "epoch": 0.29897810218978105, "grad_norm": 1.189534251886867, "learning_rate": 8.220372996431228e-06, "loss": 0.2937, "step": 3072 }, { "epoch": 0.29907542579075425, "grad_norm": 1.5982329206910104, "learning_rate": 8.219167136489245e-06, "loss": 0.6064, "step": 3073 }, { "epoch": 0.2991727493917275, "grad_norm": 1.775024980718492, "learning_rate": 8.217960956656648e-06, "loss": 0.5517, "step": 3074 }, { "epoch": 0.29927007299270075, "grad_norm": 1.4818012612095348, "learning_rate": 8.216754457053291e-06, "loss": 0.3574, "step": 3075 }, { "epoch": 0.29936739659367395, "grad_norm": 1.5621403089409462, "learning_rate": 8.215547637799068e-06, "loss": 0.4108, "step": 3076 }, { "epoch": 0.2994647201946472, "grad_norm": 1.4983847186167278, "learning_rate": 8.214340499013899e-06, "loss": 0.4644, "step": 3077 }, { "epoch": 0.29956204379562046, "grad_norm": 1.5897848132407382, "learning_rate": 8.213133040817738e-06, "loss": 0.4894, "step": 3078 }, { "epoch": 0.29965936739659366, "grad_norm": 1.6354640621760643, "learning_rate": 8.211925263330573e-06, "loss": 0.4583, "step": 3079 }, { "epoch": 0.2997566909975669, "grad_norm": 1.4952024987397354, "learning_rate": 8.21071716667242e-06, "loss": 0.5976, "step": 3080 }, { "epoch": 0.29985401459854016, "grad_norm": 1.0095340308225043, "learning_rate": 8.20950875096333e-06, "loss": 0.2524, "step": 3081 }, { "epoch": 0.29995133819951336, "grad_norm": 1.4197678935056404, "learning_rate": 8.208300016323381e-06, "loss": 0.5514, "step": 3082 }, { "epoch": 0.3000486618004866, "grad_norm": 1.249287306745543, "learning_rate": 8.207090962872688e-06, "loss": 0.2683, "step": 3083 }, { "epoch": 0.30014598540145987, "grad_norm": 1.2420194980085992, "learning_rate": 8.205881590731394e-06, "loss": 0.3941, "step": 3084 }, { "epoch": 0.30024330900243307, "grad_norm": 1.0228818593574307, "learning_rate": 8.204671900019676e-06, "loss": 0.2158, "step": 3085 }, { "epoch": 0.3003406326034063, "grad_norm": 1.4988207950368069, "learning_rate": 8.203461890857743e-06, "loss": 0.4833, "step": 3086 }, { "epoch": 0.3004379562043796, "grad_norm": 1.3402746636459373, "learning_rate": 8.20225156336583e-06, "loss": 0.437, "step": 3087 }, { "epoch": 0.3005352798053528, "grad_norm": 1.3071666622302105, "learning_rate": 8.201040917664214e-06, "loss": 0.3667, "step": 3088 }, { "epoch": 0.300632603406326, "grad_norm": 2.001934665501785, "learning_rate": 8.199829953873192e-06, "loss": 0.346, "step": 3089 }, { "epoch": 0.3007299270072993, "grad_norm": 1.50451394225963, "learning_rate": 8.198618672113104e-06, "loss": 0.4897, "step": 3090 }, { "epoch": 0.3008272506082725, "grad_norm": 1.5127622960173581, "learning_rate": 8.197407072504309e-06, "loss": 0.4301, "step": 3091 }, { "epoch": 0.30092457420924573, "grad_norm": 1.409495275402236, "learning_rate": 8.196195155167211e-06, "loss": 0.4954, "step": 3092 }, { "epoch": 0.301021897810219, "grad_norm": 1.3458224438835962, "learning_rate": 8.194982920222233e-06, "loss": 0.5023, "step": 3093 }, { "epoch": 0.30111922141119224, "grad_norm": 1.484836707336815, "learning_rate": 8.19377036778984e-06, "loss": 0.4471, "step": 3094 }, { "epoch": 0.30121654501216544, "grad_norm": 1.4314600061658445, "learning_rate": 8.192557497990522e-06, "loss": 0.4519, "step": 3095 }, { "epoch": 0.3013138686131387, "grad_norm": 1.228152077257465, "learning_rate": 8.191344310944803e-06, "loss": 0.2338, "step": 3096 }, { "epoch": 0.30141119221411194, "grad_norm": 1.4025619039626473, "learning_rate": 8.19013080677324e-06, "loss": 0.3748, "step": 3097 }, { "epoch": 0.30150851581508514, "grad_norm": 1.535338102251852, "learning_rate": 8.188916985596415e-06, "loss": 0.3129, "step": 3098 }, { "epoch": 0.3016058394160584, "grad_norm": 1.7024230210298346, "learning_rate": 8.187702847534952e-06, "loss": 0.5525, "step": 3099 }, { "epoch": 0.30170316301703165, "grad_norm": 1.4950690283515784, "learning_rate": 8.186488392709495e-06, "loss": 0.5258, "step": 3100 }, { "epoch": 0.30180048661800485, "grad_norm": 1.589216178732189, "learning_rate": 8.18527362124073e-06, "loss": 0.5745, "step": 3101 }, { "epoch": 0.3018978102189781, "grad_norm": 1.5942675928105552, "learning_rate": 8.184058533249367e-06, "loss": 0.6344, "step": 3102 }, { "epoch": 0.30199513381995136, "grad_norm": 1.3981131065521017, "learning_rate": 8.18284312885615e-06, "loss": 0.3369, "step": 3103 }, { "epoch": 0.30209245742092455, "grad_norm": 1.6180199585993311, "learning_rate": 8.181627408181854e-06, "loss": 0.4014, "step": 3104 }, { "epoch": 0.3021897810218978, "grad_norm": 1.6338683004824879, "learning_rate": 8.180411371347288e-06, "loss": 0.4983, "step": 3105 }, { "epoch": 0.30228710462287106, "grad_norm": 1.5225224020676915, "learning_rate": 8.17919501847329e-06, "loss": 0.5016, "step": 3106 }, { "epoch": 0.30238442822384426, "grad_norm": 1.23190340238718, "learning_rate": 8.177978349680727e-06, "loss": 0.3644, "step": 3107 }, { "epoch": 0.3024817518248175, "grad_norm": 1.4496645177592962, "learning_rate": 8.176761365090503e-06, "loss": 0.526, "step": 3108 }, { "epoch": 0.30257907542579077, "grad_norm": 1.5209859048615393, "learning_rate": 8.17554406482355e-06, "loss": 0.3034, "step": 3109 }, { "epoch": 0.30267639902676396, "grad_norm": 1.4404359772108442, "learning_rate": 8.17432644900083e-06, "loss": 0.4735, "step": 3110 }, { "epoch": 0.3027737226277372, "grad_norm": 1.2693525922216498, "learning_rate": 8.173108517743343e-06, "loss": 0.4021, "step": 3111 }, { "epoch": 0.30287104622871047, "grad_norm": 1.3995736051817393, "learning_rate": 8.171890271172109e-06, "loss": 0.3084, "step": 3112 }, { "epoch": 0.3029683698296837, "grad_norm": 1.5690384436250255, "learning_rate": 8.17067170940819e-06, "loss": 0.4097, "step": 3113 }, { "epoch": 0.3030656934306569, "grad_norm": 1.270566641334736, "learning_rate": 8.169452832572676e-06, "loss": 0.3813, "step": 3114 }, { "epoch": 0.3031630170316302, "grad_norm": 1.1690990375599999, "learning_rate": 8.168233640786682e-06, "loss": 0.2898, "step": 3115 }, { "epoch": 0.30326034063260343, "grad_norm": 1.5367454476066444, "learning_rate": 8.167014134171367e-06, "loss": 0.4167, "step": 3116 }, { "epoch": 0.30335766423357663, "grad_norm": 1.113322849500334, "learning_rate": 8.165794312847912e-06, "loss": 0.3274, "step": 3117 }, { "epoch": 0.3034549878345499, "grad_norm": 1.4711999953076527, "learning_rate": 8.164574176937527e-06, "loss": 0.368, "step": 3118 }, { "epoch": 0.30355231143552314, "grad_norm": 1.4465621082621003, "learning_rate": 8.163353726561462e-06, "loss": 0.2719, "step": 3119 }, { "epoch": 0.30364963503649633, "grad_norm": 1.5224694722189016, "learning_rate": 8.162132961840994e-06, "loss": 0.3296, "step": 3120 }, { "epoch": 0.3037469586374696, "grad_norm": 1.3713377819635104, "learning_rate": 8.160911882897429e-06, "loss": 0.3064, "step": 3121 }, { "epoch": 0.30384428223844284, "grad_norm": 1.6461819951429466, "learning_rate": 8.159690489852108e-06, "loss": 0.3646, "step": 3122 }, { "epoch": 0.30394160583941604, "grad_norm": 1.4328493269552467, "learning_rate": 8.1584687828264e-06, "loss": 0.4363, "step": 3123 }, { "epoch": 0.3040389294403893, "grad_norm": 1.2811535124384867, "learning_rate": 8.157246761941708e-06, "loss": 0.4582, "step": 3124 }, { "epoch": 0.30413625304136255, "grad_norm": 1.2144846492785035, "learning_rate": 8.156024427319464e-06, "loss": 0.2413, "step": 3125 }, { "epoch": 0.30423357664233575, "grad_norm": 1.4112474441167293, "learning_rate": 8.154801779081135e-06, "loss": 0.4762, "step": 3126 }, { "epoch": 0.304330900243309, "grad_norm": 1.4641495751020401, "learning_rate": 8.153578817348213e-06, "loss": 0.4905, "step": 3127 }, { "epoch": 0.30442822384428225, "grad_norm": 1.7041758523831827, "learning_rate": 8.152355542242226e-06, "loss": 0.5396, "step": 3128 }, { "epoch": 0.30452554744525545, "grad_norm": 1.2349793608481423, "learning_rate": 8.151131953884728e-06, "loss": 0.3847, "step": 3129 }, { "epoch": 0.3046228710462287, "grad_norm": 1.4859954822671841, "learning_rate": 8.149908052397314e-06, "loss": 0.5907, "step": 3130 }, { "epoch": 0.30472019464720196, "grad_norm": 1.2693166698162108, "learning_rate": 8.148683837901599e-06, "loss": 0.2636, "step": 3131 }, { "epoch": 0.30481751824817516, "grad_norm": 1.1084842598244526, "learning_rate": 8.147459310519238e-06, "loss": 0.3103, "step": 3132 }, { "epoch": 0.3049148418491484, "grad_norm": 1.2151450124002459, "learning_rate": 8.146234470371908e-06, "loss": 0.2734, "step": 3133 }, { "epoch": 0.30501216545012166, "grad_norm": 1.3625938628176788, "learning_rate": 8.145009317581328e-06, "loss": 0.3757, "step": 3134 }, { "epoch": 0.3051094890510949, "grad_norm": 1.3367991206467007, "learning_rate": 8.143783852269239e-06, "loss": 0.3469, "step": 3135 }, { "epoch": 0.3052068126520681, "grad_norm": 1.5485997458565015, "learning_rate": 8.142558074557413e-06, "loss": 0.6068, "step": 3136 }, { "epoch": 0.30530413625304137, "grad_norm": 1.4327175362669387, "learning_rate": 8.141331984567661e-06, "loss": 0.4495, "step": 3137 }, { "epoch": 0.3054014598540146, "grad_norm": 1.4648525390361329, "learning_rate": 8.140105582421819e-06, "loss": 0.4855, "step": 3138 }, { "epoch": 0.3054987834549878, "grad_norm": 1.147269868566856, "learning_rate": 8.138878868241755e-06, "loss": 0.3671, "step": 3139 }, { "epoch": 0.3055961070559611, "grad_norm": 1.4274559741070532, "learning_rate": 8.13765184214937e-06, "loss": 0.4246, "step": 3140 }, { "epoch": 0.30569343065693433, "grad_norm": 1.3864373579762495, "learning_rate": 8.13642450426659e-06, "loss": 0.5252, "step": 3141 }, { "epoch": 0.3057907542579075, "grad_norm": 1.3508219371380046, "learning_rate": 8.135196854715382e-06, "loss": 0.4022, "step": 3142 }, { "epoch": 0.3058880778588808, "grad_norm": 1.3974501891292628, "learning_rate": 8.133968893617734e-06, "loss": 0.4903, "step": 3143 }, { "epoch": 0.30598540145985403, "grad_norm": 1.379672607479744, "learning_rate": 8.132740621095672e-06, "loss": 0.4389, "step": 3144 }, { "epoch": 0.30608272506082723, "grad_norm": 1.5338640282858476, "learning_rate": 8.131512037271248e-06, "loss": 0.5719, "step": 3145 }, { "epoch": 0.3061800486618005, "grad_norm": 1.754315640729903, "learning_rate": 8.130283142266549e-06, "loss": 0.4684, "step": 3146 }, { "epoch": 0.30627737226277374, "grad_norm": 1.3260485404955915, "learning_rate": 8.129053936203688e-06, "loss": 0.3967, "step": 3147 }, { "epoch": 0.30637469586374694, "grad_norm": 1.3926987348333701, "learning_rate": 8.127824419204818e-06, "loss": 0.3916, "step": 3148 }, { "epoch": 0.3064720194647202, "grad_norm": 1.4794110467900325, "learning_rate": 8.126594591392108e-06, "loss": 0.4127, "step": 3149 }, { "epoch": 0.30656934306569344, "grad_norm": 1.5256531082933278, "learning_rate": 8.125364452887775e-06, "loss": 0.4219, "step": 3150 }, { "epoch": 0.30666666666666664, "grad_norm": 1.2765687697220431, "learning_rate": 8.124134003814054e-06, "loss": 0.3482, "step": 3151 }, { "epoch": 0.3067639902676399, "grad_norm": 1.345009147300955, "learning_rate": 8.122903244293217e-06, "loss": 0.2419, "step": 3152 }, { "epoch": 0.30686131386861315, "grad_norm": 1.243655794496096, "learning_rate": 8.121672174447566e-06, "loss": 0.3132, "step": 3153 }, { "epoch": 0.30695863746958635, "grad_norm": 1.4526276096090445, "learning_rate": 8.120440794399432e-06, "loss": 0.5369, "step": 3154 }, { "epoch": 0.3070559610705596, "grad_norm": 1.4088986355103132, "learning_rate": 8.119209104271177e-06, "loss": 0.331, "step": 3155 }, { "epoch": 0.30715328467153286, "grad_norm": 1.4915113265208189, "learning_rate": 8.117977104185198e-06, "loss": 0.6195, "step": 3156 }, { "epoch": 0.3072506082725061, "grad_norm": 1.143472956889321, "learning_rate": 8.116744794263916e-06, "loss": 0.2632, "step": 3157 }, { "epoch": 0.3073479318734793, "grad_norm": 1.3575606240914238, "learning_rate": 8.11551217462979e-06, "loss": 0.3927, "step": 3158 }, { "epoch": 0.30744525547445256, "grad_norm": 1.2891794787417357, "learning_rate": 8.114279245405301e-06, "loss": 0.3766, "step": 3159 }, { "epoch": 0.3075425790754258, "grad_norm": 1.1671813045475161, "learning_rate": 8.113046006712973e-06, "loss": 0.3527, "step": 3160 }, { "epoch": 0.307639902676399, "grad_norm": 1.2346428689153102, "learning_rate": 8.111812458675348e-06, "loss": 0.456, "step": 3161 }, { "epoch": 0.30773722627737227, "grad_norm": 1.543619139526521, "learning_rate": 8.110578601415007e-06, "loss": 0.419, "step": 3162 }, { "epoch": 0.3078345498783455, "grad_norm": 1.5361457722305751, "learning_rate": 8.109344435054557e-06, "loss": 0.4477, "step": 3163 }, { "epoch": 0.3079318734793187, "grad_norm": 1.0948111699644958, "learning_rate": 8.108109959716641e-06, "loss": 0.3469, "step": 3164 }, { "epoch": 0.30802919708029197, "grad_norm": 1.4900262169803653, "learning_rate": 8.106875175523928e-06, "loss": 0.5066, "step": 3165 }, { "epoch": 0.3081265206812652, "grad_norm": 1.2143378476964652, "learning_rate": 8.105640082599118e-06, "loss": 0.4016, "step": 3166 }, { "epoch": 0.3082238442822384, "grad_norm": 1.227404068812886, "learning_rate": 8.104404681064943e-06, "loss": 0.3408, "step": 3167 }, { "epoch": 0.3083211678832117, "grad_norm": 1.273486832675327, "learning_rate": 8.10316897104417e-06, "loss": 0.3819, "step": 3168 }, { "epoch": 0.30841849148418493, "grad_norm": 1.390509439874599, "learning_rate": 8.101932952659586e-06, "loss": 0.5108, "step": 3169 }, { "epoch": 0.30851581508515813, "grad_norm": 1.1910701089910116, "learning_rate": 8.100696626034019e-06, "loss": 0.3579, "step": 3170 }, { "epoch": 0.3086131386861314, "grad_norm": 1.6328619260471173, "learning_rate": 8.099459991290324e-06, "loss": 0.666, "step": 3171 }, { "epoch": 0.30871046228710464, "grad_norm": 1.4085180007277236, "learning_rate": 8.09822304855138e-06, "loss": 0.3684, "step": 3172 }, { "epoch": 0.30880778588807783, "grad_norm": 1.5920530522626664, "learning_rate": 8.096985797940111e-06, "loss": 0.4499, "step": 3173 }, { "epoch": 0.3089051094890511, "grad_norm": 1.2895313219583247, "learning_rate": 8.09574823957946e-06, "loss": 0.4939, "step": 3174 }, { "epoch": 0.30900243309002434, "grad_norm": 1.5242111980147517, "learning_rate": 8.094510373592403e-06, "loss": 0.3223, "step": 3175 }, { "epoch": 0.30909975669099754, "grad_norm": 1.3628460645839475, "learning_rate": 8.093272200101946e-06, "loss": 0.507, "step": 3176 }, { "epoch": 0.3091970802919708, "grad_norm": 1.45716247785806, "learning_rate": 8.092033719231134e-06, "loss": 0.2011, "step": 3177 }, { "epoch": 0.30929440389294405, "grad_norm": 1.0824220688323085, "learning_rate": 8.090794931103026e-06, "loss": 0.2127, "step": 3178 }, { "epoch": 0.3093917274939173, "grad_norm": 1.3637285345889945, "learning_rate": 8.089555835840728e-06, "loss": 0.3567, "step": 3179 }, { "epoch": 0.3094890510948905, "grad_norm": 1.352681485055594, "learning_rate": 8.088316433567369e-06, "loss": 0.4403, "step": 3180 }, { "epoch": 0.30958637469586375, "grad_norm": 1.5330943463849844, "learning_rate": 8.087076724406106e-06, "loss": 0.3379, "step": 3181 }, { "epoch": 0.309683698296837, "grad_norm": 1.4102464472701088, "learning_rate": 8.08583670848013e-06, "loss": 0.5173, "step": 3182 }, { "epoch": 0.3097810218978102, "grad_norm": 1.3268465957799758, "learning_rate": 8.084596385912666e-06, "loss": 0.2684, "step": 3183 }, { "epoch": 0.30987834549878346, "grad_norm": 1.0612856713580272, "learning_rate": 8.083355756826962e-06, "loss": 0.2057, "step": 3184 }, { "epoch": 0.3099756690997567, "grad_norm": 1.2705392445129486, "learning_rate": 8.082114821346302e-06, "loss": 0.4234, "step": 3185 }, { "epoch": 0.3100729927007299, "grad_norm": 1.502946661443704, "learning_rate": 8.080873579593997e-06, "loss": 0.5134, "step": 3186 }, { "epoch": 0.31017031630170316, "grad_norm": 1.4788874331253061, "learning_rate": 8.079632031693392e-06, "loss": 0.6157, "step": 3187 }, { "epoch": 0.3102676399026764, "grad_norm": 1.0642136610663042, "learning_rate": 8.078390177767858e-06, "loss": 0.2667, "step": 3188 }, { "epoch": 0.3103649635036496, "grad_norm": 1.2694637339318475, "learning_rate": 8.0771480179408e-06, "loss": 0.4189, "step": 3189 }, { "epoch": 0.31046228710462287, "grad_norm": 1.40127369638598, "learning_rate": 8.075905552335652e-06, "loss": 0.6007, "step": 3190 }, { "epoch": 0.3105596107055961, "grad_norm": 1.4654952978073685, "learning_rate": 8.07466278107588e-06, "loss": 0.524, "step": 3191 }, { "epoch": 0.3106569343065693, "grad_norm": 1.468706008576981, "learning_rate": 8.073419704284977e-06, "loss": 0.5511, "step": 3192 }, { "epoch": 0.3107542579075426, "grad_norm": 1.3171427220237197, "learning_rate": 8.072176322086468e-06, "loss": 0.4903, "step": 3193 }, { "epoch": 0.31085158150851583, "grad_norm": 1.5864389312753313, "learning_rate": 8.07093263460391e-06, "loss": 0.7036, "step": 3194 }, { "epoch": 0.310948905109489, "grad_norm": 1.3892836370042843, "learning_rate": 8.06968864196089e-06, "loss": 0.4277, "step": 3195 }, { "epoch": 0.3110462287104623, "grad_norm": 1.1676123850477602, "learning_rate": 8.06844434428102e-06, "loss": 0.2693, "step": 3196 }, { "epoch": 0.31114355231143553, "grad_norm": 1.506360397408636, "learning_rate": 8.067199741687951e-06, "loss": 0.4425, "step": 3197 }, { "epoch": 0.31124087591240873, "grad_norm": 1.9902872035508865, "learning_rate": 8.065954834305359e-06, "loss": 0.4464, "step": 3198 }, { "epoch": 0.311338199513382, "grad_norm": 1.3738283887856941, "learning_rate": 8.06470962225695e-06, "loss": 0.2504, "step": 3199 }, { "epoch": 0.31143552311435524, "grad_norm": 1.5173657046475728, "learning_rate": 8.063464105666462e-06, "loss": 0.4145, "step": 3200 }, { "epoch": 0.3115328467153285, "grad_norm": 1.5348915862029098, "learning_rate": 8.062218284657663e-06, "loss": 0.4182, "step": 3201 }, { "epoch": 0.3116301703163017, "grad_norm": 1.6415893626851852, "learning_rate": 8.06097215935435e-06, "loss": 0.5928, "step": 3202 }, { "epoch": 0.31172749391727494, "grad_norm": 1.2746939048596826, "learning_rate": 8.059725729880354e-06, "loss": 0.2945, "step": 3203 }, { "epoch": 0.3118248175182482, "grad_norm": 1.354856369246625, "learning_rate": 8.05847899635953e-06, "loss": 0.445, "step": 3204 }, { "epoch": 0.3119221411192214, "grad_norm": 1.3006695307047205, "learning_rate": 8.057231958915767e-06, "loss": 0.3558, "step": 3205 }, { "epoch": 0.31201946472019465, "grad_norm": 1.294215405183638, "learning_rate": 8.05598461767299e-06, "loss": 0.3764, "step": 3206 }, { "epoch": 0.3121167883211679, "grad_norm": 1.4713454344723826, "learning_rate": 8.054736972755138e-06, "loss": 0.4945, "step": 3207 }, { "epoch": 0.3122141119221411, "grad_norm": 1.1708979284493786, "learning_rate": 8.053489024286198e-06, "loss": 0.2419, "step": 3208 }, { "epoch": 0.31231143552311436, "grad_norm": 1.2933153684751388, "learning_rate": 8.052240772390176e-06, "loss": 0.4624, "step": 3209 }, { "epoch": 0.3124087591240876, "grad_norm": 1.2538957446837722, "learning_rate": 8.050992217191114e-06, "loss": 0.3305, "step": 3210 }, { "epoch": 0.3125060827250608, "grad_norm": 1.5362890980077515, "learning_rate": 8.049743358813078e-06, "loss": 0.5151, "step": 3211 }, { "epoch": 0.31260340632603406, "grad_norm": 1.3913400504692353, "learning_rate": 8.04849419738017e-06, "loss": 0.355, "step": 3212 }, { "epoch": 0.3127007299270073, "grad_norm": 1.3089021233990763, "learning_rate": 8.04724473301652e-06, "loss": 0.262, "step": 3213 }, { "epoch": 0.3127980535279805, "grad_norm": 1.0542807465796966, "learning_rate": 8.045994965846288e-06, "loss": 0.3133, "step": 3214 }, { "epoch": 0.31289537712895377, "grad_norm": 1.4749126602435412, "learning_rate": 8.044744895993666e-06, "loss": 0.46, "step": 3215 }, { "epoch": 0.312992700729927, "grad_norm": 1.3746833003058674, "learning_rate": 8.043494523582871e-06, "loss": 0.427, "step": 3216 }, { "epoch": 0.3130900243309002, "grad_norm": 1.3205796823520726, "learning_rate": 8.042243848738153e-06, "loss": 0.3354, "step": 3217 }, { "epoch": 0.31318734793187347, "grad_norm": 1.2865057356076828, "learning_rate": 8.040992871583797e-06, "loss": 0.3941, "step": 3218 }, { "epoch": 0.3132846715328467, "grad_norm": 1.1248365389432082, "learning_rate": 8.039741592244108e-06, "loss": 0.2628, "step": 3219 }, { "epoch": 0.3133819951338199, "grad_norm": 1.4652698761705358, "learning_rate": 8.03849001084343e-06, "loss": 0.3564, "step": 3220 }, { "epoch": 0.3134793187347932, "grad_norm": 1.3953599364279132, "learning_rate": 8.037238127506128e-06, "loss": 0.4163, "step": 3221 }, { "epoch": 0.31357664233576643, "grad_norm": 1.2419892638751415, "learning_rate": 8.035985942356612e-06, "loss": 0.354, "step": 3222 }, { "epoch": 0.3136739659367397, "grad_norm": 1.715104485156596, "learning_rate": 8.034733455519303e-06, "loss": 0.2963, "step": 3223 }, { "epoch": 0.3137712895377129, "grad_norm": 1.470040424076559, "learning_rate": 8.033480667118667e-06, "loss": 0.4648, "step": 3224 }, { "epoch": 0.31386861313868614, "grad_norm": 1.4565317560800817, "learning_rate": 8.032227577279191e-06, "loss": 0.512, "step": 3225 }, { "epoch": 0.3139659367396594, "grad_norm": 1.3742041452053566, "learning_rate": 8.030974186125397e-06, "loss": 0.3956, "step": 3226 }, { "epoch": 0.3140632603406326, "grad_norm": 1.3788492769137004, "learning_rate": 8.029720493781838e-06, "loss": 0.4509, "step": 3227 }, { "epoch": 0.31416058394160584, "grad_norm": 1.3858125546461868, "learning_rate": 8.028466500373089e-06, "loss": 0.2106, "step": 3228 }, { "epoch": 0.3142579075425791, "grad_norm": 1.3840076863400683, "learning_rate": 8.027212206023762e-06, "loss": 0.3038, "step": 3229 }, { "epoch": 0.3143552311435523, "grad_norm": 1.3152632009702614, "learning_rate": 8.0259576108585e-06, "loss": 0.4801, "step": 3230 }, { "epoch": 0.31445255474452555, "grad_norm": 1.3788629368363385, "learning_rate": 8.024702715001968e-06, "loss": 0.4245, "step": 3231 }, { "epoch": 0.3145498783454988, "grad_norm": 1.814337226771454, "learning_rate": 8.023447518578868e-06, "loss": 0.5632, "step": 3232 }, { "epoch": 0.314647201946472, "grad_norm": 1.4232062511008752, "learning_rate": 8.02219202171393e-06, "loss": 0.3286, "step": 3233 }, { "epoch": 0.31474452554744525, "grad_norm": 1.280903932308868, "learning_rate": 8.020936224531912e-06, "loss": 0.3626, "step": 3234 }, { "epoch": 0.3148418491484185, "grad_norm": 1.0510702763807533, "learning_rate": 8.019680127157607e-06, "loss": 0.2524, "step": 3235 }, { "epoch": 0.3149391727493917, "grad_norm": 1.7478480050109064, "learning_rate": 8.018423729715832e-06, "loss": 0.4348, "step": 3236 }, { "epoch": 0.31503649635036496, "grad_norm": 1.5359033994100462, "learning_rate": 8.017167032331434e-06, "loss": 0.4124, "step": 3237 }, { "epoch": 0.3151338199513382, "grad_norm": 1.506288459888311, "learning_rate": 8.015910035129294e-06, "loss": 0.3261, "step": 3238 }, { "epoch": 0.3152311435523114, "grad_norm": 1.2745124165082422, "learning_rate": 8.01465273823432e-06, "loss": 0.464, "step": 3239 }, { "epoch": 0.31532846715328466, "grad_norm": 1.338919807585357, "learning_rate": 8.01339514177145e-06, "loss": 0.4172, "step": 3240 }, { "epoch": 0.3154257907542579, "grad_norm": 1.486780887173232, "learning_rate": 8.012137245865654e-06, "loss": 0.5408, "step": 3241 }, { "epoch": 0.31552311435523117, "grad_norm": 1.3714950278620026, "learning_rate": 8.010879050641927e-06, "loss": 0.3436, "step": 3242 }, { "epoch": 0.31562043795620437, "grad_norm": 1.631965892281063, "learning_rate": 8.009620556225298e-06, "loss": 0.4727, "step": 3243 }, { "epoch": 0.3157177615571776, "grad_norm": 1.6149761911532763, "learning_rate": 8.008361762740825e-06, "loss": 0.4924, "step": 3244 }, { "epoch": 0.3158150851581509, "grad_norm": 1.3425533377851226, "learning_rate": 8.007102670313596e-06, "loss": 0.3844, "step": 3245 }, { "epoch": 0.3159124087591241, "grad_norm": 1.5650571116791179, "learning_rate": 8.005843279068724e-06, "loss": 0.5109, "step": 3246 }, { "epoch": 0.31600973236009733, "grad_norm": 1.469317468630812, "learning_rate": 8.004583589131359e-06, "loss": 0.3981, "step": 3247 }, { "epoch": 0.3161070559610706, "grad_norm": 1.3520956586482695, "learning_rate": 8.003323600626675e-06, "loss": 0.3628, "step": 3248 }, { "epoch": 0.3162043795620438, "grad_norm": 1.306843877245721, "learning_rate": 8.002063313679881e-06, "loss": 0.3738, "step": 3249 }, { "epoch": 0.31630170316301703, "grad_norm": 1.2062559137545288, "learning_rate": 8.000802728416209e-06, "loss": 0.3603, "step": 3250 }, { "epoch": 0.3163990267639903, "grad_norm": 1.2270301850514787, "learning_rate": 7.999541844960926e-06, "loss": 0.3444, "step": 3251 }, { "epoch": 0.3164963503649635, "grad_norm": 1.6301989651574331, "learning_rate": 7.998280663439325e-06, "loss": 0.5442, "step": 3252 }, { "epoch": 0.31659367396593674, "grad_norm": 1.3633091002846736, "learning_rate": 7.997019183976732e-06, "loss": 0.4596, "step": 3253 }, { "epoch": 0.31669099756691, "grad_norm": 1.0930243128300028, "learning_rate": 7.9957574066985e-06, "loss": 0.259, "step": 3254 }, { "epoch": 0.3167883211678832, "grad_norm": 1.3445912322829077, "learning_rate": 7.994495331730014e-06, "loss": 0.438, "step": 3255 }, { "epoch": 0.31688564476885644, "grad_norm": 1.3651374487790005, "learning_rate": 7.993232959196687e-06, "loss": 0.4589, "step": 3256 }, { "epoch": 0.3169829683698297, "grad_norm": 1.3424961538987858, "learning_rate": 7.99197028922396e-06, "loss": 0.4367, "step": 3257 }, { "epoch": 0.3170802919708029, "grad_norm": 1.5932344837064665, "learning_rate": 7.990707321937308e-06, "loss": 0.6921, "step": 3258 }, { "epoch": 0.31717761557177615, "grad_norm": 1.475521709829228, "learning_rate": 7.989444057462228e-06, "loss": 0.4759, "step": 3259 }, { "epoch": 0.3172749391727494, "grad_norm": 1.2971398875872913, "learning_rate": 7.988180495924256e-06, "loss": 0.4588, "step": 3260 }, { "epoch": 0.3173722627737226, "grad_norm": 1.4756687426354647, "learning_rate": 7.986916637448953e-06, "loss": 0.4776, "step": 3261 }, { "epoch": 0.31746958637469586, "grad_norm": 1.6271934377592354, "learning_rate": 7.985652482161907e-06, "loss": 0.4979, "step": 3262 }, { "epoch": 0.3175669099756691, "grad_norm": 1.6597644298654206, "learning_rate": 7.984388030188739e-06, "loss": 0.6091, "step": 3263 }, { "epoch": 0.31766423357664236, "grad_norm": 1.3383482658500123, "learning_rate": 7.983123281655097e-06, "loss": 0.4371, "step": 3264 }, { "epoch": 0.31776155717761556, "grad_norm": 1.4591756386356707, "learning_rate": 7.981858236686661e-06, "loss": 0.4888, "step": 3265 }, { "epoch": 0.3178588807785888, "grad_norm": 1.1677482677553854, "learning_rate": 7.98059289540914e-06, "loss": 0.348, "step": 3266 }, { "epoch": 0.31795620437956207, "grad_norm": 1.280054884121347, "learning_rate": 7.97932725794827e-06, "loss": 0.3909, "step": 3267 }, { "epoch": 0.31805352798053527, "grad_norm": 1.2818245878680554, "learning_rate": 7.97806132442982e-06, "loss": 0.3432, "step": 3268 }, { "epoch": 0.3181508515815085, "grad_norm": 1.2511980990717368, "learning_rate": 7.976795094979586e-06, "loss": 0.398, "step": 3269 }, { "epoch": 0.3182481751824818, "grad_norm": 1.1398243641659185, "learning_rate": 7.975528569723391e-06, "loss": 0.3561, "step": 3270 }, { "epoch": 0.31834549878345497, "grad_norm": 1.4375913010503336, "learning_rate": 7.974261748787096e-06, "loss": 0.4341, "step": 3271 }, { "epoch": 0.3184428223844282, "grad_norm": 1.5232808350435216, "learning_rate": 7.972994632296583e-06, "loss": 0.443, "step": 3272 }, { "epoch": 0.3185401459854015, "grad_norm": 1.1586214953526035, "learning_rate": 7.971727220377765e-06, "loss": 0.3709, "step": 3273 }, { "epoch": 0.3186374695863747, "grad_norm": 1.5274740880588324, "learning_rate": 7.970459513156587e-06, "loss": 0.3699, "step": 3274 }, { "epoch": 0.31873479318734793, "grad_norm": 1.3981699767571285, "learning_rate": 7.969191510759021e-06, "loss": 0.3678, "step": 3275 }, { "epoch": 0.3188321167883212, "grad_norm": 1.934723248663663, "learning_rate": 7.96792321331107e-06, "loss": 0.5528, "step": 3276 }, { "epoch": 0.3189294403892944, "grad_norm": 1.3063523868424662, "learning_rate": 7.966654620938765e-06, "loss": 0.381, "step": 3277 }, { "epoch": 0.31902676399026764, "grad_norm": 1.4316009890701873, "learning_rate": 7.965385733768166e-06, "loss": 0.3462, "step": 3278 }, { "epoch": 0.3191240875912409, "grad_norm": 1.3925079285209303, "learning_rate": 7.964116551925365e-06, "loss": 0.3468, "step": 3279 }, { "epoch": 0.3192214111922141, "grad_norm": 1.5929020888752208, "learning_rate": 7.96284707553648e-06, "loss": 0.5416, "step": 3280 }, { "epoch": 0.31931873479318734, "grad_norm": 1.3866900373898063, "learning_rate": 7.961577304727659e-06, "loss": 0.3982, "step": 3281 }, { "epoch": 0.3194160583941606, "grad_norm": 1.4962340605356037, "learning_rate": 7.960307239625082e-06, "loss": 0.4023, "step": 3282 }, { "epoch": 0.3195133819951338, "grad_norm": 1.4057559523045156, "learning_rate": 7.959036880354955e-06, "loss": 0.495, "step": 3283 }, { "epoch": 0.31961070559610705, "grad_norm": 1.2622065590243314, "learning_rate": 7.957766227043514e-06, "loss": 0.3581, "step": 3284 }, { "epoch": 0.3197080291970803, "grad_norm": 1.5090453488845967, "learning_rate": 7.956495279817026e-06, "loss": 0.455, "step": 3285 }, { "epoch": 0.31980535279805355, "grad_norm": 1.3133879337401893, "learning_rate": 7.955224038801785e-06, "loss": 0.4625, "step": 3286 }, { "epoch": 0.31990267639902675, "grad_norm": 2.521541296377749, "learning_rate": 7.953952504124114e-06, "loss": 0.4415, "step": 3287 }, { "epoch": 0.32, "grad_norm": 1.3567937262050411, "learning_rate": 7.952680675910365e-06, "loss": 0.3309, "step": 3288 }, { "epoch": 0.32009732360097326, "grad_norm": 1.5421944908903493, "learning_rate": 7.951408554286926e-06, "loss": 0.4589, "step": 3289 }, { "epoch": 0.32019464720194646, "grad_norm": 1.5998274173642424, "learning_rate": 7.950136139380204e-06, "loss": 0.5359, "step": 3290 }, { "epoch": 0.3202919708029197, "grad_norm": 1.2725707304657317, "learning_rate": 7.948863431316639e-06, "loss": 0.3625, "step": 3291 }, { "epoch": 0.32038929440389297, "grad_norm": 1.4290851095226622, "learning_rate": 7.947590430222702e-06, "loss": 0.4872, "step": 3292 }, { "epoch": 0.32048661800486616, "grad_norm": 1.3420498316619087, "learning_rate": 7.946317136224894e-06, "loss": 0.2389, "step": 3293 }, { "epoch": 0.3205839416058394, "grad_norm": 1.5002712163507215, "learning_rate": 7.94504354944974e-06, "loss": 0.5157, "step": 3294 }, { "epoch": 0.32068126520681267, "grad_norm": 1.660535414536618, "learning_rate": 7.9437696700238e-06, "loss": 0.4267, "step": 3295 }, { "epoch": 0.32077858880778587, "grad_norm": 1.2370297819791147, "learning_rate": 7.942495498073657e-06, "loss": 0.3355, "step": 3296 }, { "epoch": 0.3208759124087591, "grad_norm": 1.1415276287275913, "learning_rate": 7.941221033725928e-06, "loss": 0.2944, "step": 3297 }, { "epoch": 0.3209732360097324, "grad_norm": 1.5554788387015477, "learning_rate": 7.939946277107258e-06, "loss": 0.4871, "step": 3298 }, { "epoch": 0.3210705596107056, "grad_norm": 1.2985241068062738, "learning_rate": 7.938671228344319e-06, "loss": 0.3143, "step": 3299 }, { "epoch": 0.32116788321167883, "grad_norm": 1.301901578297674, "learning_rate": 7.937395887563812e-06, "loss": 0.3965, "step": 3300 }, { "epoch": 0.3212652068126521, "grad_norm": 0.9529902878913864, "learning_rate": 7.936120254892471e-06, "loss": 0.3083, "step": 3301 }, { "epoch": 0.3213625304136253, "grad_norm": 1.442015067028423, "learning_rate": 7.934844330457056e-06, "loss": 0.4318, "step": 3302 }, { "epoch": 0.32145985401459853, "grad_norm": 0.8997835110854661, "learning_rate": 7.933568114384358e-06, "loss": 0.2885, "step": 3303 }, { "epoch": 0.3215571776155718, "grad_norm": 1.5293826180265608, "learning_rate": 7.932291606801192e-06, "loss": 0.5437, "step": 3304 }, { "epoch": 0.321654501216545, "grad_norm": 1.2264156375116992, "learning_rate": 7.931014807834405e-06, "loss": 0.4001, "step": 3305 }, { "epoch": 0.32175182481751824, "grad_norm": 1.260350527748902, "learning_rate": 7.929737717610878e-06, "loss": 0.3847, "step": 3306 }, { "epoch": 0.3218491484184915, "grad_norm": 1.1346472253273232, "learning_rate": 7.92846033625751e-06, "loss": 0.3766, "step": 3307 }, { "epoch": 0.32194647201946475, "grad_norm": 1.1035142206503785, "learning_rate": 7.927182663901241e-06, "loss": 0.369, "step": 3308 }, { "epoch": 0.32204379562043794, "grad_norm": 1.2980599562576733, "learning_rate": 7.92590470066903e-06, "loss": 0.3982, "step": 3309 }, { "epoch": 0.3221411192214112, "grad_norm": 1.1742301163722888, "learning_rate": 7.924626446687871e-06, "loss": 0.3423, "step": 3310 }, { "epoch": 0.32223844282238445, "grad_norm": 1.8345279558348917, "learning_rate": 7.923347902084784e-06, "loss": 0.3145, "step": 3311 }, { "epoch": 0.32233576642335765, "grad_norm": 1.562283157560816, "learning_rate": 7.92206906698682e-06, "loss": 0.4308, "step": 3312 }, { "epoch": 0.3224330900243309, "grad_norm": 1.568673716358362, "learning_rate": 7.920789941521053e-06, "loss": 0.7025, "step": 3313 }, { "epoch": 0.32253041362530416, "grad_norm": 1.409203500738135, "learning_rate": 7.9195105258146e-06, "loss": 0.459, "step": 3314 }, { "epoch": 0.32262773722627736, "grad_norm": 0.880538645206563, "learning_rate": 7.918230819994589e-06, "loss": 0.2786, "step": 3315 }, { "epoch": 0.3227250608272506, "grad_norm": 1.1906838149715093, "learning_rate": 7.916950824188188e-06, "loss": 0.2686, "step": 3316 }, { "epoch": 0.32282238442822386, "grad_norm": 1.5111358859487132, "learning_rate": 7.91567053852259e-06, "loss": 0.5147, "step": 3317 }, { "epoch": 0.32291970802919706, "grad_norm": 1.1464259378074475, "learning_rate": 7.914389963125018e-06, "loss": 0.2685, "step": 3318 }, { "epoch": 0.3230170316301703, "grad_norm": 1.581506486679798, "learning_rate": 7.913109098122726e-06, "loss": 0.5854, "step": 3319 }, { "epoch": 0.32311435523114357, "grad_norm": 1.3242645255760028, "learning_rate": 7.91182794364299e-06, "loss": 0.2315, "step": 3320 }, { "epoch": 0.32321167883211677, "grad_norm": 1.6094300344001513, "learning_rate": 7.910546499813125e-06, "loss": 0.4739, "step": 3321 }, { "epoch": 0.32330900243309, "grad_norm": 1.3446882210600364, "learning_rate": 7.909264766760462e-06, "loss": 0.4145, "step": 3322 }, { "epoch": 0.3234063260340633, "grad_norm": 1.282785844235514, "learning_rate": 7.907982744612373e-06, "loss": 0.4324, "step": 3323 }, { "epoch": 0.32350364963503647, "grad_norm": 1.8199182173411141, "learning_rate": 7.90670043349625e-06, "loss": 0.2738, "step": 3324 }, { "epoch": 0.3236009732360097, "grad_norm": 1.3779170997525898, "learning_rate": 7.90541783353952e-06, "loss": 0.4386, "step": 3325 }, { "epoch": 0.323698296836983, "grad_norm": 1.4651475108226641, "learning_rate": 7.904134944869631e-06, "loss": 0.2272, "step": 3326 }, { "epoch": 0.3237956204379562, "grad_norm": 1.4444649350514327, "learning_rate": 7.902851767614069e-06, "loss": 0.3631, "step": 3327 }, { "epoch": 0.32389294403892943, "grad_norm": 1.6625856822026597, "learning_rate": 7.901568301900343e-06, "loss": 0.3649, "step": 3328 }, { "epoch": 0.3239902676399027, "grad_norm": 1.4506728288423798, "learning_rate": 7.900284547855992e-06, "loss": 0.3231, "step": 3329 }, { "epoch": 0.32408759124087594, "grad_norm": 1.5932450335999997, "learning_rate": 7.899000505608583e-06, "loss": 0.6145, "step": 3330 }, { "epoch": 0.32418491484184914, "grad_norm": 1.466657731000094, "learning_rate": 7.89771617528571e-06, "loss": 0.4498, "step": 3331 }, { "epoch": 0.3242822384428224, "grad_norm": 1.1444340687397683, "learning_rate": 7.896431557015001e-06, "loss": 0.3953, "step": 3332 }, { "epoch": 0.32437956204379564, "grad_norm": 1.2696327521021862, "learning_rate": 7.895146650924106e-06, "loss": 0.3974, "step": 3333 }, { "epoch": 0.32447688564476884, "grad_norm": 1.0672959190242737, "learning_rate": 7.893861457140711e-06, "loss": 0.3147, "step": 3334 }, { "epoch": 0.3245742092457421, "grad_norm": 1.5878275615936528, "learning_rate": 7.892575975792524e-06, "loss": 0.5637, "step": 3335 }, { "epoch": 0.32467153284671535, "grad_norm": 1.3553067860868797, "learning_rate": 7.891290207007284e-06, "loss": 0.3979, "step": 3336 }, { "epoch": 0.32476885644768855, "grad_norm": 1.3585205023281657, "learning_rate": 7.890004150912758e-06, "loss": 0.5408, "step": 3337 }, { "epoch": 0.3248661800486618, "grad_norm": 1.3335333713837063, "learning_rate": 7.888717807636745e-06, "loss": 0.5097, "step": 3338 }, { "epoch": 0.32496350364963505, "grad_norm": 1.6084982696846433, "learning_rate": 7.887431177307067e-06, "loss": 0.6652, "step": 3339 }, { "epoch": 0.32506082725060825, "grad_norm": 1.5254713518221517, "learning_rate": 7.886144260051577e-06, "loss": 0.5413, "step": 3340 }, { "epoch": 0.3251581508515815, "grad_norm": 1.2319691547427678, "learning_rate": 7.88485705599816e-06, "loss": 0.3669, "step": 3341 }, { "epoch": 0.32525547445255476, "grad_norm": 1.8060436355287317, "learning_rate": 7.883569565274722e-06, "loss": 0.332, "step": 3342 }, { "epoch": 0.32535279805352796, "grad_norm": 1.2724687132520958, "learning_rate": 7.882281788009207e-06, "loss": 0.4156, "step": 3343 }, { "epoch": 0.3254501216545012, "grad_norm": 1.2678653056689784, "learning_rate": 7.880993724329578e-06, "loss": 0.34, "step": 3344 }, { "epoch": 0.32554744525547447, "grad_norm": 1.6551598614012555, "learning_rate": 7.879705374363831e-06, "loss": 0.4642, "step": 3345 }, { "epoch": 0.32564476885644766, "grad_norm": 1.3206920191183078, "learning_rate": 7.878416738239991e-06, "loss": 0.3755, "step": 3346 }, { "epoch": 0.3257420924574209, "grad_norm": 1.3858157374277495, "learning_rate": 7.877127816086109e-06, "loss": 0.3394, "step": 3347 }, { "epoch": 0.32583941605839417, "grad_norm": 2.0774474222482286, "learning_rate": 7.87583860803027e-06, "loss": 0.4237, "step": 3348 }, { "epoch": 0.32593673965936737, "grad_norm": 2.373250216339497, "learning_rate": 7.87454911420058e-06, "loss": 0.4854, "step": 3349 }, { "epoch": 0.3260340632603406, "grad_norm": 1.2888779016735306, "learning_rate": 7.873259334725177e-06, "loss": 0.2953, "step": 3350 }, { "epoch": 0.3261313868613139, "grad_norm": 1.4655511800886896, "learning_rate": 7.87196926973223e-06, "loss": 0.5252, "step": 3351 }, { "epoch": 0.32622871046228713, "grad_norm": 1.3179348636560488, "learning_rate": 7.870678919349929e-06, "loss": 0.3587, "step": 3352 }, { "epoch": 0.32632603406326033, "grad_norm": 1.5442264887171864, "learning_rate": 7.869388283706501e-06, "loss": 0.3808, "step": 3353 }, { "epoch": 0.3264233576642336, "grad_norm": 0.909176934030135, "learning_rate": 7.868097362930194e-06, "loss": 0.1721, "step": 3354 }, { "epoch": 0.32652068126520684, "grad_norm": 1.5955557971117078, "learning_rate": 7.866806157149291e-06, "loss": 0.5127, "step": 3355 }, { "epoch": 0.32661800486618003, "grad_norm": 1.260336198155953, "learning_rate": 7.865514666492096e-06, "loss": 0.2699, "step": 3356 }, { "epoch": 0.3267153284671533, "grad_norm": 1.5004384671553455, "learning_rate": 7.864222891086948e-06, "loss": 0.3168, "step": 3357 }, { "epoch": 0.32681265206812654, "grad_norm": 1.261076205541224, "learning_rate": 7.862930831062211e-06, "loss": 0.3678, "step": 3358 }, { "epoch": 0.32690997566909974, "grad_norm": 1.637405579152474, "learning_rate": 7.861638486546279e-06, "loss": 0.4613, "step": 3359 }, { "epoch": 0.327007299270073, "grad_norm": 1.3584995739472485, "learning_rate": 7.860345857667571e-06, "loss": 0.3229, "step": 3360 }, { "epoch": 0.32710462287104625, "grad_norm": 1.5428951197572305, "learning_rate": 7.859052944554537e-06, "loss": 0.4904, "step": 3361 }, { "epoch": 0.32720194647201944, "grad_norm": 1.2674356859582525, "learning_rate": 7.857759747335652e-06, "loss": 0.2942, "step": 3362 }, { "epoch": 0.3272992700729927, "grad_norm": 1.5802075836374327, "learning_rate": 7.856466266139426e-06, "loss": 0.2949, "step": 3363 }, { "epoch": 0.32739659367396595, "grad_norm": 1.8209546966028776, "learning_rate": 7.855172501094394e-06, "loss": 0.5036, "step": 3364 }, { "epoch": 0.32749391727493915, "grad_norm": 1.5829487718400805, "learning_rate": 7.853878452329113e-06, "loss": 0.3638, "step": 3365 }, { "epoch": 0.3275912408759124, "grad_norm": 1.439883288183698, "learning_rate": 7.852584119972178e-06, "loss": 0.4529, "step": 3366 }, { "epoch": 0.32768856447688566, "grad_norm": 1.188441154560626, "learning_rate": 7.851289504152201e-06, "loss": 0.1984, "step": 3367 }, { "epoch": 0.32778588807785886, "grad_norm": 1.3841398947965158, "learning_rate": 7.84999460499784e-06, "loss": 0.3274, "step": 3368 }, { "epoch": 0.3278832116788321, "grad_norm": 1.3784180119913427, "learning_rate": 7.848699422637757e-06, "loss": 0.5186, "step": 3369 }, { "epoch": 0.32798053527980536, "grad_norm": 1.6167829071938324, "learning_rate": 7.847403957200667e-06, "loss": 0.5905, "step": 3370 }, { "epoch": 0.32807785888077856, "grad_norm": 1.5035664756701093, "learning_rate": 7.846108208815292e-06, "loss": 0.3502, "step": 3371 }, { "epoch": 0.3281751824817518, "grad_norm": 1.3836098651173667, "learning_rate": 7.844812177610398e-06, "loss": 0.426, "step": 3372 }, { "epoch": 0.32827250608272507, "grad_norm": 1.33208856990685, "learning_rate": 7.843515863714766e-06, "loss": 0.38, "step": 3373 }, { "epoch": 0.3283698296836983, "grad_norm": 1.3222208170433662, "learning_rate": 7.842219267257216e-06, "loss": 0.33, "step": 3374 }, { "epoch": 0.3284671532846715, "grad_norm": 1.26421550866771, "learning_rate": 7.84092238836659e-06, "loss": 0.3682, "step": 3375 }, { "epoch": 0.3285644768856448, "grad_norm": 1.3594451535417975, "learning_rate": 7.839625227171762e-06, "loss": 0.4504, "step": 3376 }, { "epoch": 0.328661800486618, "grad_norm": 1.2038996790979526, "learning_rate": 7.838327783801627e-06, "loss": 0.3675, "step": 3377 }, { "epoch": 0.3287591240875912, "grad_norm": 1.2523761100181583, "learning_rate": 7.837030058385117e-06, "loss": 0.2582, "step": 3378 }, { "epoch": 0.3288564476885645, "grad_norm": 1.38623949822592, "learning_rate": 7.835732051051188e-06, "loss": 0.426, "step": 3379 }, { "epoch": 0.32895377128953773, "grad_norm": 1.6752196039887233, "learning_rate": 7.834433761928819e-06, "loss": 0.5995, "step": 3380 }, { "epoch": 0.32905109489051093, "grad_norm": 1.4769893614554368, "learning_rate": 7.833135191147027e-06, "loss": 0.4434, "step": 3381 }, { "epoch": 0.3291484184914842, "grad_norm": 1.3473793961690093, "learning_rate": 7.831836338834851e-06, "loss": 0.4064, "step": 3382 }, { "epoch": 0.32924574209245744, "grad_norm": 1.4564612861566626, "learning_rate": 7.830537205121354e-06, "loss": 0.5275, "step": 3383 }, { "epoch": 0.32934306569343064, "grad_norm": 1.4662477809046923, "learning_rate": 7.829237790135638e-06, "loss": 0.3745, "step": 3384 }, { "epoch": 0.3294403892944039, "grad_norm": 1.3883338656353856, "learning_rate": 7.827938094006822e-06, "loss": 0.4361, "step": 3385 }, { "epoch": 0.32953771289537714, "grad_norm": 1.2360062745222065, "learning_rate": 7.826638116864061e-06, "loss": 0.2936, "step": 3386 }, { "epoch": 0.32963503649635034, "grad_norm": 1.2636393287865908, "learning_rate": 7.82533785883653e-06, "loss": 0.3816, "step": 3387 }, { "epoch": 0.3297323600973236, "grad_norm": 1.5704874728825693, "learning_rate": 7.824037320053442e-06, "loss": 0.4946, "step": 3388 }, { "epoch": 0.32982968369829685, "grad_norm": 1.5450366878165769, "learning_rate": 7.822736500644028e-06, "loss": 0.5973, "step": 3389 }, { "epoch": 0.32992700729927005, "grad_norm": 1.570140468606553, "learning_rate": 7.821435400737555e-06, "loss": 0.6187, "step": 3390 }, { "epoch": 0.3300243309002433, "grad_norm": 1.404973531589098, "learning_rate": 7.820134020463311e-06, "loss": 0.4404, "step": 3391 }, { "epoch": 0.33012165450121655, "grad_norm": 1.4221866811013593, "learning_rate": 7.818832359950615e-06, "loss": 0.4375, "step": 3392 }, { "epoch": 0.3302189781021898, "grad_norm": 1.3514761483715907, "learning_rate": 7.817530419328815e-06, "loss": 0.4633, "step": 3393 }, { "epoch": 0.330316301703163, "grad_norm": 1.4122938191319212, "learning_rate": 7.816228198727287e-06, "loss": 0.4735, "step": 3394 }, { "epoch": 0.33041362530413626, "grad_norm": 1.2807472917541904, "learning_rate": 7.814925698275432e-06, "loss": 0.2993, "step": 3395 }, { "epoch": 0.3305109489051095, "grad_norm": 1.2374164437493267, "learning_rate": 7.813622918102679e-06, "loss": 0.4486, "step": 3396 }, { "epoch": 0.3306082725060827, "grad_norm": 1.4923726710921128, "learning_rate": 7.812319858338486e-06, "loss": 0.3976, "step": 3397 }, { "epoch": 0.33070559610705597, "grad_norm": 1.4652422146853137, "learning_rate": 7.811016519112342e-06, "loss": 0.509, "step": 3398 }, { "epoch": 0.3308029197080292, "grad_norm": 1.3523346564010856, "learning_rate": 7.80971290055376e-06, "loss": 0.4045, "step": 3399 }, { "epoch": 0.3309002433090024, "grad_norm": 1.4034359644709637, "learning_rate": 7.808409002792277e-06, "loss": 0.5252, "step": 3400 }, { "epoch": 0.33099756690997567, "grad_norm": 1.5977042267924388, "learning_rate": 7.807104825957466e-06, "loss": 0.5708, "step": 3401 }, { "epoch": 0.3310948905109489, "grad_norm": 1.194169039851455, "learning_rate": 7.805800370178925e-06, "loss": 0.2592, "step": 3402 }, { "epoch": 0.3311922141119221, "grad_norm": 1.3572077520529662, "learning_rate": 7.804495635586274e-06, "loss": 0.3838, "step": 3403 }, { "epoch": 0.3312895377128954, "grad_norm": 1.6103699950857218, "learning_rate": 7.80319062230917e-06, "loss": 0.2847, "step": 3404 }, { "epoch": 0.33138686131386863, "grad_norm": 1.1427751941761943, "learning_rate": 7.80188533047729e-06, "loss": 0.3235, "step": 3405 }, { "epoch": 0.33148418491484183, "grad_norm": 1.4205910051862616, "learning_rate": 7.800579760220343e-06, "loss": 0.4415, "step": 3406 }, { "epoch": 0.3315815085158151, "grad_norm": 1.239833112068907, "learning_rate": 7.799273911668062e-06, "loss": 0.296, "step": 3407 }, { "epoch": 0.33167883211678834, "grad_norm": 1.382006652854662, "learning_rate": 7.797967784950215e-06, "loss": 0.5129, "step": 3408 }, { "epoch": 0.33177615571776153, "grad_norm": 1.3910482812533478, "learning_rate": 7.796661380196587e-06, "loss": 0.4355, "step": 3409 }, { "epoch": 0.3318734793187348, "grad_norm": 1.3166393673557537, "learning_rate": 7.795354697537e-06, "loss": 0.3357, "step": 3410 }, { "epoch": 0.33197080291970804, "grad_norm": 1.3540344564992455, "learning_rate": 7.794047737101298e-06, "loss": 0.2772, "step": 3411 }, { "epoch": 0.33206812652068124, "grad_norm": 1.5732997739305445, "learning_rate": 7.792740499019354e-06, "loss": 0.368, "step": 3412 }, { "epoch": 0.3321654501216545, "grad_norm": 1.00398349093736, "learning_rate": 7.791432983421071e-06, "loss": 0.2794, "step": 3413 }, { "epoch": 0.33226277372262775, "grad_norm": 1.5831231764140208, "learning_rate": 7.790125190436378e-06, "loss": 0.399, "step": 3414 }, { "epoch": 0.332360097323601, "grad_norm": 1.301291609070449, "learning_rate": 7.788817120195228e-06, "loss": 0.4975, "step": 3415 }, { "epoch": 0.3324574209245742, "grad_norm": 3.04667201221868, "learning_rate": 7.787508772827606e-06, "loss": 0.3034, "step": 3416 }, { "epoch": 0.33255474452554745, "grad_norm": 1.3711038355442808, "learning_rate": 7.786200148463525e-06, "loss": 0.4023, "step": 3417 }, { "epoch": 0.3326520681265207, "grad_norm": 1.4460757328108422, "learning_rate": 7.784891247233025e-06, "loss": 0.5218, "step": 3418 }, { "epoch": 0.3327493917274939, "grad_norm": 1.5174415949182483, "learning_rate": 7.783582069266167e-06, "loss": 0.5401, "step": 3419 }, { "epoch": 0.33284671532846716, "grad_norm": 1.170900270080405, "learning_rate": 7.78227261469305e-06, "loss": 0.3093, "step": 3420 }, { "epoch": 0.3329440389294404, "grad_norm": 1.1117784496155982, "learning_rate": 7.78096288364379e-06, "loss": 0.2159, "step": 3421 }, { "epoch": 0.3330413625304136, "grad_norm": 1.385907002729564, "learning_rate": 7.779652876248541e-06, "loss": 0.4513, "step": 3422 }, { "epoch": 0.33313868613138686, "grad_norm": 1.091845134289533, "learning_rate": 7.778342592637477e-06, "loss": 0.249, "step": 3423 }, { "epoch": 0.3332360097323601, "grad_norm": 1.1987125682853903, "learning_rate": 7.7770320329408e-06, "loss": 0.4583, "step": 3424 }, { "epoch": 0.3333333333333333, "grad_norm": 1.150260309114711, "learning_rate": 7.775721197288746e-06, "loss": 0.4145, "step": 3425 }, { "epoch": 0.33343065693430657, "grad_norm": 1.1244746146994131, "learning_rate": 7.77441008581157e-06, "loss": 0.2334, "step": 3426 }, { "epoch": 0.3335279805352798, "grad_norm": 2.372002969337908, "learning_rate": 7.773098698639558e-06, "loss": 0.3346, "step": 3427 }, { "epoch": 0.333625304136253, "grad_norm": 1.460305633169593, "learning_rate": 7.771787035903023e-06, "loss": 0.5202, "step": 3428 }, { "epoch": 0.3337226277372263, "grad_norm": 1.4552706392676258, "learning_rate": 7.77047509773231e-06, "loss": 0.3249, "step": 3429 }, { "epoch": 0.3338199513381995, "grad_norm": 26.423500051667432, "learning_rate": 7.769162884257778e-06, "loss": 0.3919, "step": 3430 }, { "epoch": 0.3339172749391727, "grad_norm": 1.3364724766772538, "learning_rate": 7.767850395609832e-06, "loss": 0.4882, "step": 3431 }, { "epoch": 0.334014598540146, "grad_norm": 1.162494089255124, "learning_rate": 7.766537631918888e-06, "loss": 0.4172, "step": 3432 }, { "epoch": 0.33411192214111923, "grad_norm": 1.4584273535075323, "learning_rate": 7.765224593315402e-06, "loss": 0.5721, "step": 3433 }, { "epoch": 0.33420924574209243, "grad_norm": 1.39936657097592, "learning_rate": 7.763911279929848e-06, "loss": 0.4454, "step": 3434 }, { "epoch": 0.3343065693430657, "grad_norm": 1.5774498430067907, "learning_rate": 7.76259769189273e-06, "loss": 0.6756, "step": 3435 }, { "epoch": 0.33440389294403894, "grad_norm": 1.4346477573335101, "learning_rate": 7.761283829334583e-06, "loss": 0.4939, "step": 3436 }, { "epoch": 0.3345012165450122, "grad_norm": 1.2329772568374064, "learning_rate": 7.759969692385963e-06, "loss": 0.3576, "step": 3437 }, { "epoch": 0.3345985401459854, "grad_norm": 86.06815068595351, "learning_rate": 7.75865528117746e-06, "loss": 0.7983, "step": 3438 }, { "epoch": 0.33469586374695864, "grad_norm": 1.4916748444459116, "learning_rate": 7.757340595839686e-06, "loss": 0.3408, "step": 3439 }, { "epoch": 0.3347931873479319, "grad_norm": 1.204864631425379, "learning_rate": 7.756025636503281e-06, "loss": 0.2893, "step": 3440 }, { "epoch": 0.3348905109489051, "grad_norm": 1.1483309949418294, "learning_rate": 7.754710403298915e-06, "loss": 0.307, "step": 3441 }, { "epoch": 0.33498783454987835, "grad_norm": 1.3801437746700074, "learning_rate": 7.753394896357283e-06, "loss": 0.5086, "step": 3442 }, { "epoch": 0.3350851581508516, "grad_norm": 1.4670528589774587, "learning_rate": 7.752079115809105e-06, "loss": 0.5494, "step": 3443 }, { "epoch": 0.3351824817518248, "grad_norm": 1.2268331435647832, "learning_rate": 7.750763061785139e-06, "loss": 0.3421, "step": 3444 }, { "epoch": 0.33527980535279805, "grad_norm": 1.117498287907938, "learning_rate": 7.749446734416153e-06, "loss": 0.3583, "step": 3445 }, { "epoch": 0.3353771289537713, "grad_norm": 1.6628933950216975, "learning_rate": 7.748130133832956e-06, "loss": 0.4265, "step": 3446 }, { "epoch": 0.3354744525547445, "grad_norm": 1.4371941282513903, "learning_rate": 7.746813260166379e-06, "loss": 0.5499, "step": 3447 }, { "epoch": 0.33557177615571776, "grad_norm": 1.4139636094726638, "learning_rate": 7.74549611354728e-06, "loss": 0.5113, "step": 3448 }, { "epoch": 0.335669099756691, "grad_norm": 1.3607040572953095, "learning_rate": 7.744178694106545e-06, "loss": 0.3662, "step": 3449 }, { "epoch": 0.3357664233576642, "grad_norm": 1.3497420382405303, "learning_rate": 7.742861001975086e-06, "loss": 0.37, "step": 3450 }, { "epoch": 0.33586374695863747, "grad_norm": 1.1583812763163044, "learning_rate": 7.741543037283844e-06, "loss": 0.2328, "step": 3451 }, { "epoch": 0.3359610705596107, "grad_norm": 1.233691835278808, "learning_rate": 7.740224800163783e-06, "loss": 0.3023, "step": 3452 }, { "epoch": 0.3360583941605839, "grad_norm": 1.770041486932794, "learning_rate": 7.738906290745902e-06, "loss": 0.4559, "step": 3453 }, { "epoch": 0.33615571776155717, "grad_norm": 1.6249038227669963, "learning_rate": 7.737587509161218e-06, "loss": 0.3305, "step": 3454 }, { "epoch": 0.3362530413625304, "grad_norm": 1.7123859840919058, "learning_rate": 7.73626845554078e-06, "loss": 0.8223, "step": 3455 }, { "epoch": 0.3363503649635036, "grad_norm": 1.465565821382168, "learning_rate": 7.734949130015665e-06, "loss": 0.3951, "step": 3456 }, { "epoch": 0.3364476885644769, "grad_norm": 1.3358312642650005, "learning_rate": 7.733629532716974e-06, "loss": 0.3988, "step": 3457 }, { "epoch": 0.33654501216545013, "grad_norm": 1.4614411980006665, "learning_rate": 7.732309663775834e-06, "loss": 0.4447, "step": 3458 }, { "epoch": 0.3366423357664234, "grad_norm": 1.5007446348141111, "learning_rate": 7.730989523323405e-06, "loss": 0.5075, "step": 3459 }, { "epoch": 0.3367396593673966, "grad_norm": 1.378374467272079, "learning_rate": 7.72966911149087e-06, "loss": 0.3713, "step": 3460 }, { "epoch": 0.33683698296836984, "grad_norm": 1.264554548276351, "learning_rate": 7.728348428409434e-06, "loss": 0.4239, "step": 3461 }, { "epoch": 0.3369343065693431, "grad_norm": 1.3654939025524866, "learning_rate": 7.72702747421034e-06, "loss": 0.3861, "step": 3462 }, { "epoch": 0.3370316301703163, "grad_norm": 1.456076628879786, "learning_rate": 7.72570624902485e-06, "loss": 0.4548, "step": 3463 }, { "epoch": 0.33712895377128954, "grad_norm": 1.5979907891076075, "learning_rate": 7.724384752984253e-06, "loss": 0.5395, "step": 3464 }, { "epoch": 0.3372262773722628, "grad_norm": 1.479690145713753, "learning_rate": 7.723062986219871e-06, "loss": 0.4676, "step": 3465 }, { "epoch": 0.337323600973236, "grad_norm": 1.5794398807050158, "learning_rate": 7.721740948863044e-06, "loss": 0.6383, "step": 3466 }, { "epoch": 0.33742092457420925, "grad_norm": 1.5077566027780562, "learning_rate": 7.720418641045147e-06, "loss": 0.449, "step": 3467 }, { "epoch": 0.3375182481751825, "grad_norm": 1.536687422230877, "learning_rate": 7.719096062897578e-06, "loss": 0.3885, "step": 3468 }, { "epoch": 0.3376155717761557, "grad_norm": 1.4136867972495795, "learning_rate": 7.717773214551762e-06, "loss": 0.4262, "step": 3469 }, { "epoch": 0.33771289537712895, "grad_norm": 1.7521028146405362, "learning_rate": 7.71645009613915e-06, "loss": 0.3553, "step": 3470 }, { "epoch": 0.3378102189781022, "grad_norm": 1.28472614917785, "learning_rate": 7.715126707791223e-06, "loss": 0.5044, "step": 3471 }, { "epoch": 0.3379075425790754, "grad_norm": 1.5285157685020219, "learning_rate": 7.713803049639485e-06, "loss": 0.3067, "step": 3472 }, { "epoch": 0.33800486618004866, "grad_norm": 1.301311213128161, "learning_rate": 7.712479121815473e-06, "loss": 0.4036, "step": 3473 }, { "epoch": 0.3381021897810219, "grad_norm": 1.4164505494518185, "learning_rate": 7.711154924450741e-06, "loss": 0.3674, "step": 3474 }, { "epoch": 0.3381995133819951, "grad_norm": 1.2252498333021546, "learning_rate": 7.709830457676876e-06, "loss": 0.273, "step": 3475 }, { "epoch": 0.33829683698296836, "grad_norm": 1.4131843411362779, "learning_rate": 7.708505721625497e-06, "loss": 0.565, "step": 3476 }, { "epoch": 0.3383941605839416, "grad_norm": 1.6947767404353455, "learning_rate": 7.707180716428237e-06, "loss": 0.4248, "step": 3477 }, { "epoch": 0.3384914841849148, "grad_norm": 1.2913551057356365, "learning_rate": 7.705855442216766e-06, "loss": 0.3537, "step": 3478 }, { "epoch": 0.33858880778588807, "grad_norm": 1.150600446597589, "learning_rate": 7.704529899122776e-06, "loss": 0.3311, "step": 3479 }, { "epoch": 0.3386861313868613, "grad_norm": 1.2021311829209522, "learning_rate": 7.703204087277989e-06, "loss": 0.4163, "step": 3480 }, { "epoch": 0.3387834549878346, "grad_norm": 1.256321657329123, "learning_rate": 7.70187800681415e-06, "loss": 0.3609, "step": 3481 }, { "epoch": 0.3388807785888078, "grad_norm": 1.3378074898611492, "learning_rate": 7.70055165786303e-06, "loss": 0.3365, "step": 3482 }, { "epoch": 0.338978102189781, "grad_norm": 1.170985634605712, "learning_rate": 7.699225040556435e-06, "loss": 0.2524, "step": 3483 }, { "epoch": 0.3390754257907543, "grad_norm": 1.3653491057947706, "learning_rate": 7.697898155026188e-06, "loss": 0.282, "step": 3484 }, { "epoch": 0.3391727493917275, "grad_norm": 1.3228454670807173, "learning_rate": 7.696571001404143e-06, "loss": 0.4489, "step": 3485 }, { "epoch": 0.33927007299270073, "grad_norm": 1.568456967406488, "learning_rate": 7.695243579822179e-06, "loss": 0.672, "step": 3486 }, { "epoch": 0.339367396593674, "grad_norm": 1.5380107558510523, "learning_rate": 7.693915890412205e-06, "loss": 0.4099, "step": 3487 }, { "epoch": 0.3394647201946472, "grad_norm": 1.522434941547624, "learning_rate": 7.692587933306152e-06, "loss": 0.3895, "step": 3488 }, { "epoch": 0.33956204379562044, "grad_norm": 1.6424350744273293, "learning_rate": 7.691259708635983e-06, "loss": 0.4547, "step": 3489 }, { "epoch": 0.3396593673965937, "grad_norm": 1.1285195925950828, "learning_rate": 7.689931216533682e-06, "loss": 0.3205, "step": 3490 }, { "epoch": 0.3397566909975669, "grad_norm": 1.2211335178765037, "learning_rate": 7.68860245713126e-06, "loss": 0.2849, "step": 3491 }, { "epoch": 0.33985401459854014, "grad_norm": 1.3440619705365895, "learning_rate": 7.687273430560763e-06, "loss": 0.3493, "step": 3492 }, { "epoch": 0.3399513381995134, "grad_norm": 1.9270777796759784, "learning_rate": 7.685944136954252e-06, "loss": 0.3207, "step": 3493 }, { "epoch": 0.3400486618004866, "grad_norm": 1.3294429746967642, "learning_rate": 7.684614576443821e-06, "loss": 0.3215, "step": 3494 }, { "epoch": 0.34014598540145985, "grad_norm": 1.299183824061712, "learning_rate": 7.68328474916159e-06, "loss": 0.3565, "step": 3495 }, { "epoch": 0.3402433090024331, "grad_norm": 1.5283573621457682, "learning_rate": 7.681954655239703e-06, "loss": 0.4789, "step": 3496 }, { "epoch": 0.3403406326034063, "grad_norm": 1.4764556038728422, "learning_rate": 7.680624294810335e-06, "loss": 0.4079, "step": 3497 }, { "epoch": 0.34043795620437955, "grad_norm": 1.5139159959394148, "learning_rate": 7.679293668005683e-06, "loss": 0.5341, "step": 3498 }, { "epoch": 0.3405352798053528, "grad_norm": 1.5537302722123498, "learning_rate": 7.677962774957971e-06, "loss": 0.478, "step": 3499 }, { "epoch": 0.340632603406326, "grad_norm": 1.5827415849591213, "learning_rate": 7.676631615799453e-06, "loss": 0.359, "step": 3500 }, { "epoch": 0.34072992700729926, "grad_norm": 1.8389434879657838, "learning_rate": 7.675300190662406e-06, "loss": 0.3688, "step": 3501 }, { "epoch": 0.3408272506082725, "grad_norm": 1.219949474382852, "learning_rate": 7.673968499679134e-06, "loss": 0.3099, "step": 3502 }, { "epoch": 0.34092457420924577, "grad_norm": 1.5256977236182008, "learning_rate": 7.67263654298197e-06, "loss": 0.3838, "step": 3503 }, { "epoch": 0.34102189781021897, "grad_norm": 1.4659409139833723, "learning_rate": 7.671304320703269e-06, "loss": 0.4845, "step": 3504 }, { "epoch": 0.3411192214111922, "grad_norm": 1.667843387368496, "learning_rate": 7.669971832975417e-06, "loss": 0.5876, "step": 3505 }, { "epoch": 0.3412165450121655, "grad_norm": 1.346414364244355, "learning_rate": 7.668639079930821e-06, "loss": 0.4337, "step": 3506 }, { "epoch": 0.34131386861313867, "grad_norm": 2.022273962642171, "learning_rate": 7.66730606170192e-06, "loss": 0.472, "step": 3507 }, { "epoch": 0.3414111922141119, "grad_norm": 1.4021904736753643, "learning_rate": 7.665972778421175e-06, "loss": 0.3331, "step": 3508 }, { "epoch": 0.3415085158150852, "grad_norm": 1.3964469851310124, "learning_rate": 7.664639230221081e-06, "loss": 0.4151, "step": 3509 }, { "epoch": 0.3416058394160584, "grad_norm": 1.437167087649688, "learning_rate": 7.663305417234146e-06, "loss": 0.3751, "step": 3510 }, { "epoch": 0.34170316301703163, "grad_norm": 1.3813486918408102, "learning_rate": 7.661971339592913e-06, "loss": 0.2492, "step": 3511 }, { "epoch": 0.3418004866180049, "grad_norm": 2.213050506553265, "learning_rate": 7.660636997429953e-06, "loss": 0.2442, "step": 3512 }, { "epoch": 0.3418978102189781, "grad_norm": 1.4125007730667565, "learning_rate": 7.659302390877858e-06, "loss": 0.3901, "step": 3513 }, { "epoch": 0.34199513381995134, "grad_norm": 1.5262547193449572, "learning_rate": 7.657967520069253e-06, "loss": 0.5142, "step": 3514 }, { "epoch": 0.3420924574209246, "grad_norm": 1.419026298338398, "learning_rate": 7.65663238513678e-06, "loss": 0.4604, "step": 3515 }, { "epoch": 0.3421897810218978, "grad_norm": 1.354197849012477, "learning_rate": 7.655296986213114e-06, "loss": 0.3741, "step": 3516 }, { "epoch": 0.34228710462287104, "grad_norm": 1.3675444218814583, "learning_rate": 7.653961323430954e-06, "loss": 0.4636, "step": 3517 }, { "epoch": 0.3423844282238443, "grad_norm": 1.6737094690203054, "learning_rate": 7.652625396923027e-06, "loss": 0.5368, "step": 3518 }, { "epoch": 0.3424817518248175, "grad_norm": 1.5346015529843957, "learning_rate": 7.651289206822084e-06, "loss": 0.5808, "step": 3519 }, { "epoch": 0.34257907542579075, "grad_norm": 1.7677803050375525, "learning_rate": 7.649952753260901e-06, "loss": 0.4331, "step": 3520 }, { "epoch": 0.342676399026764, "grad_norm": 1.5130877149006923, "learning_rate": 7.648616036372288e-06, "loss": 0.4343, "step": 3521 }, { "epoch": 0.34277372262773725, "grad_norm": 1.6505485143132894, "learning_rate": 7.647279056289068e-06, "loss": 0.4256, "step": 3522 }, { "epoch": 0.34287104622871045, "grad_norm": 1.1213144756308453, "learning_rate": 7.6459418131441e-06, "loss": 0.248, "step": 3523 }, { "epoch": 0.3429683698296837, "grad_norm": 1.4978011226878554, "learning_rate": 7.64460430707027e-06, "loss": 0.4457, "step": 3524 }, { "epoch": 0.34306569343065696, "grad_norm": 1.724927585574558, "learning_rate": 7.643266538200485e-06, "loss": 0.5746, "step": 3525 }, { "epoch": 0.34316301703163016, "grad_norm": 1.4218186587395187, "learning_rate": 7.641928506667677e-06, "loss": 0.4628, "step": 3526 }, { "epoch": 0.3432603406326034, "grad_norm": 1.1979163238967183, "learning_rate": 7.640590212604813e-06, "loss": 0.3276, "step": 3527 }, { "epoch": 0.34335766423357666, "grad_norm": 1.5459257353579257, "learning_rate": 7.639251656144873e-06, "loss": 0.5543, "step": 3528 }, { "epoch": 0.34345498783454986, "grad_norm": 2.168708523840157, "learning_rate": 7.637912837420876e-06, "loss": 0.4451, "step": 3529 }, { "epoch": 0.3435523114355231, "grad_norm": 1.3295572972455665, "learning_rate": 7.63657375656586e-06, "loss": 0.3659, "step": 3530 }, { "epoch": 0.34364963503649637, "grad_norm": 1.3940981237720802, "learning_rate": 7.635234413712886e-06, "loss": 0.3305, "step": 3531 }, { "epoch": 0.34374695863746957, "grad_norm": 1.668696796269181, "learning_rate": 7.63389480899505e-06, "loss": 0.262, "step": 3532 }, { "epoch": 0.3438442822384428, "grad_norm": 1.3235762846506705, "learning_rate": 7.632554942545468e-06, "loss": 0.354, "step": 3533 }, { "epoch": 0.3439416058394161, "grad_norm": 1.774595647074867, "learning_rate": 7.631214814497283e-06, "loss": 0.4181, "step": 3534 }, { "epoch": 0.3440389294403893, "grad_norm": 1.3699960689070612, "learning_rate": 7.629874424983664e-06, "loss": 0.4893, "step": 3535 }, { "epoch": 0.3441362530413625, "grad_norm": 1.0054517790481798, "learning_rate": 7.628533774137809e-06, "loss": 0.2678, "step": 3536 }, { "epoch": 0.3442335766423358, "grad_norm": 1.2585804176689974, "learning_rate": 7.627192862092936e-06, "loss": 0.3145, "step": 3537 }, { "epoch": 0.344330900243309, "grad_norm": 1.4486458769434574, "learning_rate": 7.625851688982293e-06, "loss": 0.5018, "step": 3538 }, { "epoch": 0.34442822384428223, "grad_norm": 1.6761010493950546, "learning_rate": 7.624510254939155e-06, "loss": 0.5786, "step": 3539 }, { "epoch": 0.3445255474452555, "grad_norm": 1.1326870552848924, "learning_rate": 7.623168560096819e-06, "loss": 0.2714, "step": 3540 }, { "epoch": 0.3446228710462287, "grad_norm": 1.380514271735255, "learning_rate": 7.62182660458861e-06, "loss": 0.4435, "step": 3541 }, { "epoch": 0.34472019464720194, "grad_norm": 1.3847938610044406, "learning_rate": 7.620484388547881e-06, "loss": 0.456, "step": 3542 }, { "epoch": 0.3448175182481752, "grad_norm": 1.4774572497281164, "learning_rate": 7.619141912108008e-06, "loss": 0.4016, "step": 3543 }, { "epoch": 0.34491484184914845, "grad_norm": 1.4155087320104913, "learning_rate": 7.617799175402392e-06, "loss": 0.4672, "step": 3544 }, { "epoch": 0.34501216545012164, "grad_norm": 1.2347049030843364, "learning_rate": 7.616456178564463e-06, "loss": 0.4701, "step": 3545 }, { "epoch": 0.3451094890510949, "grad_norm": 1.3617965352115597, "learning_rate": 7.615112921727677e-06, "loss": 0.4411, "step": 3546 }, { "epoch": 0.34520681265206815, "grad_norm": 1.5080403368283026, "learning_rate": 7.613769405025511e-06, "loss": 0.446, "step": 3547 }, { "epoch": 0.34530413625304135, "grad_norm": 1.3587460871748047, "learning_rate": 7.612425628591473e-06, "loss": 0.4618, "step": 3548 }, { "epoch": 0.3454014598540146, "grad_norm": 1.5594894397765757, "learning_rate": 7.611081592559095e-06, "loss": 0.6454, "step": 3549 }, { "epoch": 0.34549878345498786, "grad_norm": 1.5472776871393785, "learning_rate": 7.609737297061934e-06, "loss": 0.4209, "step": 3550 }, { "epoch": 0.34559610705596105, "grad_norm": 1.1387925779252865, "learning_rate": 7.608392742233573e-06, "loss": 0.2542, "step": 3551 }, { "epoch": 0.3456934306569343, "grad_norm": 2.1308610533262753, "learning_rate": 7.6070479282076226e-06, "loss": 0.4232, "step": 3552 }, { "epoch": 0.34579075425790756, "grad_norm": 1.655772230226882, "learning_rate": 7.605702855117717e-06, "loss": 0.316, "step": 3553 }, { "epoch": 0.34588807785888076, "grad_norm": 1.2773133253854652, "learning_rate": 7.604357523097518e-06, "loss": 0.3933, "step": 3554 }, { "epoch": 0.345985401459854, "grad_norm": 1.5157476945746606, "learning_rate": 7.6030119322807105e-06, "loss": 0.4895, "step": 3555 }, { "epoch": 0.34608272506082727, "grad_norm": 1.5384836415390328, "learning_rate": 7.601666082801007e-06, "loss": 0.4571, "step": 3556 }, { "epoch": 0.34618004866180047, "grad_norm": 1.3494696317056387, "learning_rate": 7.600319974792145e-06, "loss": 0.3585, "step": 3557 }, { "epoch": 0.3462773722627737, "grad_norm": 1.5639692646479821, "learning_rate": 7.59897360838789e-06, "loss": 0.4913, "step": 3558 }, { "epoch": 0.346374695863747, "grad_norm": 1.5771201683321747, "learning_rate": 7.59762698372203e-06, "loss": 0.644, "step": 3559 }, { "epoch": 0.34647201946472017, "grad_norm": 1.445755844165086, "learning_rate": 7.596280100928379e-06, "loss": 0.4662, "step": 3560 }, { "epoch": 0.3465693430656934, "grad_norm": 1.0169653044814775, "learning_rate": 7.59493296014078e-06, "loss": 0.1873, "step": 3561 }, { "epoch": 0.3466666666666667, "grad_norm": 1.3573861316393436, "learning_rate": 7.593585561493098e-06, "loss": 0.3621, "step": 3562 }, { "epoch": 0.3467639902676399, "grad_norm": 1.3679634725925294, "learning_rate": 7.592237905119224e-06, "loss": 0.3714, "step": 3563 }, { "epoch": 0.34686131386861313, "grad_norm": 1.6153262572349345, "learning_rate": 7.590889991153076e-06, "loss": 0.1934, "step": 3564 }, { "epoch": 0.3469586374695864, "grad_norm": 1.3501706757209104, "learning_rate": 7.589541819728597e-06, "loss": 0.3771, "step": 3565 }, { "epoch": 0.34705596107055964, "grad_norm": 1.4359834383485004, "learning_rate": 7.588193390979756e-06, "loss": 0.4021, "step": 3566 }, { "epoch": 0.34715328467153284, "grad_norm": 1.6178918309073458, "learning_rate": 7.5868447050405456e-06, "loss": 0.5326, "step": 3567 }, { "epoch": 0.3472506082725061, "grad_norm": 1.2161304982080974, "learning_rate": 7.585495762044989e-06, "loss": 0.3215, "step": 3568 }, { "epoch": 0.34734793187347934, "grad_norm": 1.2514780842366786, "learning_rate": 7.584146562127128e-06, "loss": 0.2619, "step": 3569 }, { "epoch": 0.34744525547445254, "grad_norm": 1.292347877079837, "learning_rate": 7.5827971054210334e-06, "loss": 0.3722, "step": 3570 }, { "epoch": 0.3475425790754258, "grad_norm": 1.428500991174703, "learning_rate": 7.581447392060806e-06, "loss": 0.4681, "step": 3571 }, { "epoch": 0.34763990267639905, "grad_norm": 1.6187169310973553, "learning_rate": 7.5800974221805635e-06, "loss": 0.3123, "step": 3572 }, { "epoch": 0.34773722627737225, "grad_norm": 1.2828962403261588, "learning_rate": 7.5787471959144535e-06, "loss": 0.3426, "step": 3573 }, { "epoch": 0.3478345498783455, "grad_norm": 1.3424794600068044, "learning_rate": 7.577396713396649e-06, "loss": 0.2749, "step": 3574 }, { "epoch": 0.34793187347931875, "grad_norm": 1.311429995391481, "learning_rate": 7.576045974761352e-06, "loss": 0.1912, "step": 3575 }, { "epoch": 0.34802919708029195, "grad_norm": 1.9475934853598964, "learning_rate": 7.57469498014278e-06, "loss": 0.3568, "step": 3576 }, { "epoch": 0.3481265206812652, "grad_norm": 1.5712590946732445, "learning_rate": 7.573343729675187e-06, "loss": 0.4361, "step": 3577 }, { "epoch": 0.34822384428223846, "grad_norm": 1.3460143771601136, "learning_rate": 7.5719922234928435e-06, "loss": 0.3258, "step": 3578 }, { "epoch": 0.34832116788321166, "grad_norm": 1.3457032135851295, "learning_rate": 7.5706404617300544e-06, "loss": 0.2679, "step": 3579 }, { "epoch": 0.3484184914841849, "grad_norm": 1.602637400100207, "learning_rate": 7.569288444521141e-06, "loss": 0.4349, "step": 3580 }, { "epoch": 0.34851581508515816, "grad_norm": 1.193898879748489, "learning_rate": 7.567936172000456e-06, "loss": 0.3823, "step": 3581 }, { "epoch": 0.34861313868613136, "grad_norm": 1.7213934943419156, "learning_rate": 7.5665836443023764e-06, "loss": 0.4477, "step": 3582 }, { "epoch": 0.3487104622871046, "grad_norm": 2.0896889317289262, "learning_rate": 7.5652308615613025e-06, "loss": 0.548, "step": 3583 }, { "epoch": 0.34880778588807787, "grad_norm": 1.2133584709672542, "learning_rate": 7.563877823911661e-06, "loss": 0.3342, "step": 3584 }, { "epoch": 0.34890510948905107, "grad_norm": 1.3518803834565474, "learning_rate": 7.562524531487902e-06, "loss": 0.4428, "step": 3585 }, { "epoch": 0.3490024330900243, "grad_norm": 1.5646028482749865, "learning_rate": 7.561170984424509e-06, "loss": 0.4805, "step": 3586 }, { "epoch": 0.3490997566909976, "grad_norm": 1.725109749219042, "learning_rate": 7.5598171828559775e-06, "loss": 0.3953, "step": 3587 }, { "epoch": 0.34919708029197083, "grad_norm": 1.6751385503374157, "learning_rate": 7.558463126916842e-06, "loss": 0.3466, "step": 3588 }, { "epoch": 0.349294403892944, "grad_norm": 1.577501457080682, "learning_rate": 7.557108816741651e-06, "loss": 0.387, "step": 3589 }, { "epoch": 0.3493917274939173, "grad_norm": 1.5319961753926956, "learning_rate": 7.5557542524649866e-06, "loss": 0.2916, "step": 3590 }, { "epoch": 0.34948905109489053, "grad_norm": 2.5209551905243845, "learning_rate": 7.554399434221449e-06, "loss": 0.2941, "step": 3591 }, { "epoch": 0.34958637469586373, "grad_norm": 1.1980216546975295, "learning_rate": 7.553044362145672e-06, "loss": 0.3867, "step": 3592 }, { "epoch": 0.349683698296837, "grad_norm": 1.425370917650529, "learning_rate": 7.551689036372306e-06, "loss": 0.4788, "step": 3593 }, { "epoch": 0.34978102189781024, "grad_norm": 1.5504040775883203, "learning_rate": 7.550333457036032e-06, "loss": 0.5355, "step": 3594 }, { "epoch": 0.34987834549878344, "grad_norm": 1.253372887044746, "learning_rate": 7.5489776242715564e-06, "loss": 0.4783, "step": 3595 }, { "epoch": 0.3499756690997567, "grad_norm": 1.4985868632126829, "learning_rate": 7.547621538213607e-06, "loss": 0.4225, "step": 3596 }, { "epoch": 0.35007299270072995, "grad_norm": 1.4435970539146459, "learning_rate": 7.5462651989969385e-06, "loss": 0.3745, "step": 3597 }, { "epoch": 0.35017031630170314, "grad_norm": 1.3981266208958172, "learning_rate": 7.5449086067563314e-06, "loss": 0.3456, "step": 3598 }, { "epoch": 0.3502676399026764, "grad_norm": 1.8315759023798575, "learning_rate": 7.543551761626594e-06, "loss": 0.5542, "step": 3599 }, { "epoch": 0.35036496350364965, "grad_norm": 1.0610077420382762, "learning_rate": 7.542194663742553e-06, "loss": 0.3009, "step": 3600 }, { "epoch": 0.35046228710462285, "grad_norm": 1.4599599576710758, "learning_rate": 7.5408373132390674e-06, "loss": 0.3322, "step": 3601 }, { "epoch": 0.3505596107055961, "grad_norm": 1.357290752856556, "learning_rate": 7.539479710251014e-06, "loss": 0.4219, "step": 3602 }, { "epoch": 0.35065693430656936, "grad_norm": 1.3466674715612543, "learning_rate": 7.538121854913303e-06, "loss": 0.4688, "step": 3603 }, { "epoch": 0.35075425790754255, "grad_norm": 1.9207221814789595, "learning_rate": 7.536763747360863e-06, "loss": 0.5269, "step": 3604 }, { "epoch": 0.3508515815085158, "grad_norm": 1.4616085693980927, "learning_rate": 7.535405387728649e-06, "loss": 0.5216, "step": 3605 }, { "epoch": 0.35094890510948906, "grad_norm": 1.638466859707607, "learning_rate": 7.534046776151645e-06, "loss": 0.4155, "step": 3606 }, { "epoch": 0.35104622871046226, "grad_norm": 1.8610570015640786, "learning_rate": 7.532687912764853e-06, "loss": 0.4385, "step": 3607 }, { "epoch": 0.3511435523114355, "grad_norm": 1.390595082164788, "learning_rate": 7.531328797703308e-06, "loss": 0.4791, "step": 3608 }, { "epoch": 0.35124087591240877, "grad_norm": 1.4107542592778146, "learning_rate": 7.529969431102063e-06, "loss": 0.5517, "step": 3609 }, { "epoch": 0.351338199513382, "grad_norm": 1.2873358057005926, "learning_rate": 7.528609813096203e-06, "loss": 0.2964, "step": 3610 }, { "epoch": 0.3514355231143552, "grad_norm": 1.585587787123838, "learning_rate": 7.527249943820831e-06, "loss": 0.5375, "step": 3611 }, { "epoch": 0.3515328467153285, "grad_norm": 1.307663133994867, "learning_rate": 7.525889823411076e-06, "loss": 0.2655, "step": 3612 }, { "epoch": 0.3516301703163017, "grad_norm": 1.3238605926395333, "learning_rate": 7.524529452002099e-06, "loss": 0.3678, "step": 3613 }, { "epoch": 0.3517274939172749, "grad_norm": 1.1603226975769314, "learning_rate": 7.523168829729078e-06, "loss": 0.294, "step": 3614 }, { "epoch": 0.3518248175182482, "grad_norm": 0.9756816706305802, "learning_rate": 7.52180795672722e-06, "loss": 0.1748, "step": 3615 }, { "epoch": 0.35192214111922143, "grad_norm": 1.6381757346588681, "learning_rate": 7.520446833131756e-06, "loss": 0.3671, "step": 3616 }, { "epoch": 0.35201946472019463, "grad_norm": 1.6012410655431273, "learning_rate": 7.51908545907794e-06, "loss": 0.7845, "step": 3617 }, { "epoch": 0.3521167883211679, "grad_norm": 1.513216444842459, "learning_rate": 7.517723834701054e-06, "loss": 0.5675, "step": 3618 }, { "epoch": 0.35221411192214114, "grad_norm": 1.5876840830126697, "learning_rate": 7.516361960136403e-06, "loss": 0.4117, "step": 3619 }, { "epoch": 0.35231143552311434, "grad_norm": 1.2568334714306615, "learning_rate": 7.514999835519318e-06, "loss": 0.3623, "step": 3620 }, { "epoch": 0.3524087591240876, "grad_norm": 1.430201120305146, "learning_rate": 7.513637460985153e-06, "loss": 0.4618, "step": 3621 }, { "epoch": 0.35250608272506084, "grad_norm": 1.6954496126059004, "learning_rate": 7.512274836669288e-06, "loss": 0.4018, "step": 3622 }, { "epoch": 0.35260340632603404, "grad_norm": 1.3717482871101743, "learning_rate": 7.510911962707128e-06, "loss": 0.5364, "step": 3623 }, { "epoch": 0.3527007299270073, "grad_norm": 1.3632873709904672, "learning_rate": 7.509548839234102e-06, "loss": 0.3786, "step": 3624 }, { "epoch": 0.35279805352798055, "grad_norm": 1.5766225782578598, "learning_rate": 7.508185466385667e-06, "loss": 0.6176, "step": 3625 }, { "epoch": 0.35289537712895375, "grad_norm": 1.6300483194455977, "learning_rate": 7.506821844297301e-06, "loss": 0.4239, "step": 3626 }, { "epoch": 0.352992700729927, "grad_norm": 1.244455438425305, "learning_rate": 7.505457973104506e-06, "loss": 0.3627, "step": 3627 }, { "epoch": 0.35309002433090025, "grad_norm": 1.441382927434537, "learning_rate": 7.504093852942815e-06, "loss": 0.3853, "step": 3628 }, { "epoch": 0.35318734793187345, "grad_norm": 1.3700542367515711, "learning_rate": 7.502729483947776e-06, "loss": 0.5312, "step": 3629 }, { "epoch": 0.3532846715328467, "grad_norm": 1.3101033627157612, "learning_rate": 7.50136486625497e-06, "loss": 0.3997, "step": 3630 }, { "epoch": 0.35338199513381996, "grad_norm": 1.4700150418971674, "learning_rate": 7.500000000000001e-06, "loss": 0.4536, "step": 3631 }, { "epoch": 0.3534793187347932, "grad_norm": 1.4399856969334963, "learning_rate": 7.4986348853184944e-06, "loss": 0.4301, "step": 3632 }, { "epoch": 0.3535766423357664, "grad_norm": 1.5002693093456838, "learning_rate": 7.497269522346105e-06, "loss": 0.5339, "step": 3633 }, { "epoch": 0.35367396593673966, "grad_norm": 1.4332148495576913, "learning_rate": 7.4959039112185065e-06, "loss": 0.4218, "step": 3634 }, { "epoch": 0.3537712895377129, "grad_norm": 1.430533403803271, "learning_rate": 7.494538052071403e-06, "loss": 0.4658, "step": 3635 }, { "epoch": 0.3538686131386861, "grad_norm": 1.7972458166545215, "learning_rate": 7.4931719450405185e-06, "loss": 0.2642, "step": 3636 }, { "epoch": 0.35396593673965937, "grad_norm": 1.138544720049106, "learning_rate": 7.491805590261607e-06, "loss": 0.3429, "step": 3637 }, { "epoch": 0.3540632603406326, "grad_norm": 1.9786831181417208, "learning_rate": 7.490438987870443e-06, "loss": 0.4378, "step": 3638 }, { "epoch": 0.3541605839416058, "grad_norm": 1.136048581491554, "learning_rate": 7.489072138002825e-06, "loss": 0.2668, "step": 3639 }, { "epoch": 0.3542579075425791, "grad_norm": 1.3441611259868125, "learning_rate": 7.4877050407945796e-06, "loss": 0.468, "step": 3640 }, { "epoch": 0.35435523114355233, "grad_norm": 1.1560682205410908, "learning_rate": 7.486337696381554e-06, "loss": 0.3363, "step": 3641 }, { "epoch": 0.3544525547445255, "grad_norm": 1.0393622403442615, "learning_rate": 7.484970104899624e-06, "loss": 0.2803, "step": 3642 }, { "epoch": 0.3545498783454988, "grad_norm": 1.514160943546232, "learning_rate": 7.483602266484686e-06, "loss": 0.4441, "step": 3643 }, { "epoch": 0.35464720194647203, "grad_norm": 1.580897624441714, "learning_rate": 7.482234181272666e-06, "loss": 0.3502, "step": 3644 }, { "epoch": 0.35474452554744523, "grad_norm": 1.2716040020289627, "learning_rate": 7.480865849399508e-06, "loss": 0.3551, "step": 3645 }, { "epoch": 0.3548418491484185, "grad_norm": 1.6397132883430414, "learning_rate": 7.4794972710011885e-06, "loss": 0.54, "step": 3646 }, { "epoch": 0.35493917274939174, "grad_norm": 1.4383012777464312, "learning_rate": 7.478128446213699e-06, "loss": 0.3954, "step": 3647 }, { "epoch": 0.35503649635036494, "grad_norm": 1.4020794377643642, "learning_rate": 7.476759375173063e-06, "loss": 0.2869, "step": 3648 }, { "epoch": 0.3551338199513382, "grad_norm": 1.420695895411819, "learning_rate": 7.475390058015326e-06, "loss": 0.4162, "step": 3649 }, { "epoch": 0.35523114355231145, "grad_norm": 1.3856407596171976, "learning_rate": 7.474020494876556e-06, "loss": 0.403, "step": 3650 }, { "epoch": 0.35532846715328464, "grad_norm": 1.6355333792797258, "learning_rate": 7.472650685892851e-06, "loss": 0.6147, "step": 3651 }, { "epoch": 0.3554257907542579, "grad_norm": 1.3284128213805295, "learning_rate": 7.471280631200325e-06, "loss": 0.2128, "step": 3652 }, { "epoch": 0.35552311435523115, "grad_norm": 1.357840581851552, "learning_rate": 7.469910330935126e-06, "loss": 0.3483, "step": 3653 }, { "epoch": 0.3556204379562044, "grad_norm": 1.2770810104938104, "learning_rate": 7.468539785233417e-06, "loss": 0.2812, "step": 3654 }, { "epoch": 0.3557177615571776, "grad_norm": 1.7995385373189825, "learning_rate": 7.467168994231394e-06, "loss": 0.2944, "step": 3655 }, { "epoch": 0.35581508515815086, "grad_norm": 2.0088307214551846, "learning_rate": 7.465797958065272e-06, "loss": 0.3204, "step": 3656 }, { "epoch": 0.3559124087591241, "grad_norm": 1.3573976363618303, "learning_rate": 7.46442667687129e-06, "loss": 0.3666, "step": 3657 }, { "epoch": 0.3560097323600973, "grad_norm": 1.4772002442457595, "learning_rate": 7.463055150785715e-06, "loss": 0.3756, "step": 3658 }, { "epoch": 0.35610705596107056, "grad_norm": 1.7169887882459811, "learning_rate": 7.461683379944835e-06, "loss": 0.6085, "step": 3659 }, { "epoch": 0.3562043795620438, "grad_norm": 1.753449718301559, "learning_rate": 7.460311364484964e-06, "loss": 0.691, "step": 3660 }, { "epoch": 0.356301703163017, "grad_norm": 1.9142906208727815, "learning_rate": 7.458939104542442e-06, "loss": 0.5569, "step": 3661 }, { "epoch": 0.35639902676399027, "grad_norm": 1.564692980111203, "learning_rate": 7.457566600253631e-06, "loss": 0.417, "step": 3662 }, { "epoch": 0.3564963503649635, "grad_norm": 1.4241868392709103, "learning_rate": 7.4561938517549136e-06, "loss": 0.3702, "step": 3663 }, { "epoch": 0.3565936739659367, "grad_norm": 1.0774182753758312, "learning_rate": 7.4548208591827056e-06, "loss": 0.325, "step": 3664 }, { "epoch": 0.35669099756691, "grad_norm": 1.3486569434899873, "learning_rate": 7.4534476226734384e-06, "loss": 0.3906, "step": 3665 }, { "epoch": 0.3567883211678832, "grad_norm": 1.2303476425395732, "learning_rate": 7.452074142363573e-06, "loss": 0.3073, "step": 3666 }, { "epoch": 0.3568856447688564, "grad_norm": 1.40711240815019, "learning_rate": 7.450700418389594e-06, "loss": 0.386, "step": 3667 }, { "epoch": 0.3569829683698297, "grad_norm": 1.4572695621598242, "learning_rate": 7.449326450888007e-06, "loss": 0.5228, "step": 3668 }, { "epoch": 0.35708029197080293, "grad_norm": 1.4210722617044638, "learning_rate": 7.4479522399953465e-06, "loss": 0.4409, "step": 3669 }, { "epoch": 0.35717761557177613, "grad_norm": 1.508447701068191, "learning_rate": 7.446577785848166e-06, "loss": 0.4571, "step": 3670 }, { "epoch": 0.3572749391727494, "grad_norm": 1.3197481653540741, "learning_rate": 7.445203088583047e-06, "loss": 0.3886, "step": 3671 }, { "epoch": 0.35737226277372264, "grad_norm": 1.7619697747209029, "learning_rate": 7.443828148336594e-06, "loss": 0.5652, "step": 3672 }, { "epoch": 0.3574695863746959, "grad_norm": 1.2741586714286437, "learning_rate": 7.442452965245437e-06, "loss": 0.2068, "step": 3673 }, { "epoch": 0.3575669099756691, "grad_norm": 1.6171437133168665, "learning_rate": 7.4410775394462285e-06, "loss": 0.4785, "step": 3674 }, { "epoch": 0.35766423357664234, "grad_norm": 1.3575662879144605, "learning_rate": 7.4397018710756415e-06, "loss": 0.3851, "step": 3675 }, { "epoch": 0.3577615571776156, "grad_norm": 1.5452516302727513, "learning_rate": 7.438325960270382e-06, "loss": 0.4154, "step": 3676 }, { "epoch": 0.3578588807785888, "grad_norm": 1.4057841059024094, "learning_rate": 7.436949807167172e-06, "loss": 0.4309, "step": 3677 }, { "epoch": 0.35795620437956205, "grad_norm": 1.3402694159440718, "learning_rate": 7.435573411902763e-06, "loss": 0.3905, "step": 3678 }, { "epoch": 0.3580535279805353, "grad_norm": 1.899972993257223, "learning_rate": 7.434196774613926e-06, "loss": 0.324, "step": 3679 }, { "epoch": 0.3581508515815085, "grad_norm": 1.517104694845036, "learning_rate": 7.432819895437461e-06, "loss": 0.5038, "step": 3680 }, { "epoch": 0.35824817518248175, "grad_norm": 1.5026505417391594, "learning_rate": 7.431442774510186e-06, "loss": 0.5613, "step": 3681 }, { "epoch": 0.358345498783455, "grad_norm": 1.6998698072507739, "learning_rate": 7.4300654119689475e-06, "loss": 0.4362, "step": 3682 }, { "epoch": 0.3584428223844282, "grad_norm": 1.5499969728501817, "learning_rate": 7.4286878079506175e-06, "loss": 0.5288, "step": 3683 }, { "epoch": 0.35854014598540146, "grad_norm": 1.7237862062144214, "learning_rate": 7.4273099625920866e-06, "loss": 0.2981, "step": 3684 }, { "epoch": 0.3586374695863747, "grad_norm": 1.5399796130329546, "learning_rate": 7.4259318760302725e-06, "loss": 0.3486, "step": 3685 }, { "epoch": 0.3587347931873479, "grad_norm": 1.734733819358604, "learning_rate": 7.424553548402116e-06, "loss": 0.4681, "step": 3686 }, { "epoch": 0.35883211678832116, "grad_norm": 1.4858189837407711, "learning_rate": 7.423174979844583e-06, "loss": 0.4624, "step": 3687 }, { "epoch": 0.3589294403892944, "grad_norm": 2.0317094723346556, "learning_rate": 7.421796170494664e-06, "loss": 0.421, "step": 3688 }, { "epoch": 0.3590267639902676, "grad_norm": 1.6561811900690029, "learning_rate": 7.42041712048937e-06, "loss": 0.4593, "step": 3689 }, { "epoch": 0.35912408759124087, "grad_norm": 1.3298486632098465, "learning_rate": 7.41903782996574e-06, "loss": 0.3328, "step": 3690 }, { "epoch": 0.3592214111922141, "grad_norm": 1.6636689486346872, "learning_rate": 7.417658299060834e-06, "loss": 0.4999, "step": 3691 }, { "epoch": 0.3593187347931873, "grad_norm": 1.566657143760038, "learning_rate": 7.4162785279117354e-06, "loss": 0.6945, "step": 3692 }, { "epoch": 0.3594160583941606, "grad_norm": 1.1976745803136968, "learning_rate": 7.414898516655555e-06, "loss": 0.3368, "step": 3693 }, { "epoch": 0.35951338199513383, "grad_norm": 1.5599198973704254, "learning_rate": 7.413518265429427e-06, "loss": 0.3875, "step": 3694 }, { "epoch": 0.3596107055961071, "grad_norm": 1.45764719852823, "learning_rate": 7.412137774370502e-06, "loss": 0.4665, "step": 3695 }, { "epoch": 0.3597080291970803, "grad_norm": 1.2276949665906944, "learning_rate": 7.410757043615966e-06, "loss": 0.285, "step": 3696 }, { "epoch": 0.35980535279805353, "grad_norm": 1.5359072801599356, "learning_rate": 7.40937607330302e-06, "loss": 0.376, "step": 3697 }, { "epoch": 0.3599026763990268, "grad_norm": 1.209800432472448, "learning_rate": 7.4079948635688925e-06, "loss": 0.3298, "step": 3698 }, { "epoch": 0.36, "grad_norm": 1.4892209107876908, "learning_rate": 7.406613414550835e-06, "loss": 0.3474, "step": 3699 }, { "epoch": 0.36009732360097324, "grad_norm": 1.4539822746623663, "learning_rate": 7.405231726386124e-06, "loss": 0.3756, "step": 3700 }, { "epoch": 0.3601946472019465, "grad_norm": 1.4980607212426436, "learning_rate": 7.403849799212057e-06, "loss": 0.4841, "step": 3701 }, { "epoch": 0.3602919708029197, "grad_norm": 1.3698188527628243, "learning_rate": 7.40246763316596e-06, "loss": 0.3881, "step": 3702 }, { "epoch": 0.36038929440389295, "grad_norm": 1.3253201655674622, "learning_rate": 7.401085228385177e-06, "loss": 0.3933, "step": 3703 }, { "epoch": 0.3604866180048662, "grad_norm": 1.4396239636102728, "learning_rate": 7.399702585007077e-06, "loss": 0.408, "step": 3704 }, { "epoch": 0.3605839416058394, "grad_norm": 1.5453004626353954, "learning_rate": 7.398319703169058e-06, "loss": 0.5276, "step": 3705 }, { "epoch": 0.36068126520681265, "grad_norm": 1.403106821872738, "learning_rate": 7.396936583008535e-06, "loss": 0.4362, "step": 3706 }, { "epoch": 0.3607785888077859, "grad_norm": 1.6766339602355043, "learning_rate": 7.395553224662952e-06, "loss": 0.5511, "step": 3707 }, { "epoch": 0.3608759124087591, "grad_norm": 1.6187272412677542, "learning_rate": 7.394169628269771e-06, "loss": 0.455, "step": 3708 }, { "epoch": 0.36097323600973236, "grad_norm": 1.2227561836718068, "learning_rate": 7.392785793966483e-06, "loss": 0.3885, "step": 3709 }, { "epoch": 0.3610705596107056, "grad_norm": 1.3724156281155415, "learning_rate": 7.391401721890599e-06, "loss": 0.2664, "step": 3710 }, { "epoch": 0.3611678832116788, "grad_norm": 1.2230528654367878, "learning_rate": 7.390017412179658e-06, "loss": 0.3302, "step": 3711 }, { "epoch": 0.36126520681265206, "grad_norm": 1.1928082331280525, "learning_rate": 7.388632864971217e-06, "loss": 0.2629, "step": 3712 }, { "epoch": 0.3613625304136253, "grad_norm": 1.6037454753999223, "learning_rate": 7.38724808040286e-06, "loss": 0.3905, "step": 3713 }, { "epoch": 0.3614598540145985, "grad_norm": 1.3621648463732532, "learning_rate": 7.3858630586121926e-06, "loss": 0.403, "step": 3714 }, { "epoch": 0.36155717761557177, "grad_norm": 1.829925954185779, "learning_rate": 7.384477799736848e-06, "loss": 0.5973, "step": 3715 }, { "epoch": 0.361654501216545, "grad_norm": 1.4268953334113146, "learning_rate": 7.383092303914479e-06, "loss": 0.4476, "step": 3716 }, { "epoch": 0.3617518248175183, "grad_norm": 1.4218010516377573, "learning_rate": 7.381706571282762e-06, "loss": 0.3333, "step": 3717 }, { "epoch": 0.3618491484184915, "grad_norm": 1.4149794161212645, "learning_rate": 7.3803206019794004e-06, "loss": 0.4466, "step": 3718 }, { "epoch": 0.3619464720194647, "grad_norm": 1.3528540289292441, "learning_rate": 7.378934396142116e-06, "loss": 0.4781, "step": 3719 }, { "epoch": 0.362043795620438, "grad_norm": 1.7444608763847531, "learning_rate": 7.3775479539086595e-06, "loss": 0.3911, "step": 3720 }, { "epoch": 0.3621411192214112, "grad_norm": 1.3328673124502177, "learning_rate": 7.376161275416802e-06, "loss": 0.4312, "step": 3721 }, { "epoch": 0.36223844282238443, "grad_norm": 1.5999365764206797, "learning_rate": 7.374774360804337e-06, "loss": 0.4827, "step": 3722 }, { "epoch": 0.3623357664233577, "grad_norm": 1.3911965727851072, "learning_rate": 7.3733872102090846e-06, "loss": 0.3357, "step": 3723 }, { "epoch": 0.3624330900243309, "grad_norm": 2.108972016446877, "learning_rate": 7.371999823768885e-06, "loss": 0.534, "step": 3724 }, { "epoch": 0.36253041362530414, "grad_norm": 1.3184107339533155, "learning_rate": 7.370612201621606e-06, "loss": 0.3319, "step": 3725 }, { "epoch": 0.3626277372262774, "grad_norm": 1.5480195252175315, "learning_rate": 7.369224343905135e-06, "loss": 0.4569, "step": 3726 }, { "epoch": 0.3627250608272506, "grad_norm": 1.5137208598424667, "learning_rate": 7.3678362507573855e-06, "loss": 0.4966, "step": 3727 }, { "epoch": 0.36282238442822384, "grad_norm": 1.9044576254266983, "learning_rate": 7.366447922316292e-06, "loss": 0.3348, "step": 3728 }, { "epoch": 0.3629197080291971, "grad_norm": 1.5245853892303196, "learning_rate": 7.365059358719814e-06, "loss": 0.4253, "step": 3729 }, { "epoch": 0.3630170316301703, "grad_norm": 1.122262909962616, "learning_rate": 7.3636705601059344e-06, "loss": 0.3222, "step": 3730 }, { "epoch": 0.36311435523114355, "grad_norm": 1.8386225119764608, "learning_rate": 7.362281526612657e-06, "loss": 0.2037, "step": 3731 }, { "epoch": 0.3632116788321168, "grad_norm": 1.4527104104207418, "learning_rate": 7.360892258378014e-06, "loss": 0.4542, "step": 3732 }, { "epoch": 0.36330900243309, "grad_norm": 1.2351545455698876, "learning_rate": 7.359502755540054e-06, "loss": 0.3215, "step": 3733 }, { "epoch": 0.36340632603406325, "grad_norm": 1.2618923481187536, "learning_rate": 7.358113018236856e-06, "loss": 0.3527, "step": 3734 }, { "epoch": 0.3635036496350365, "grad_norm": 1.659524308148738, "learning_rate": 7.356723046606519e-06, "loss": 0.4318, "step": 3735 }, { "epoch": 0.3636009732360097, "grad_norm": 1.565399683654683, "learning_rate": 7.355332840787164e-06, "loss": 0.5299, "step": 3736 }, { "epoch": 0.36369829683698296, "grad_norm": 1.6178264737718506, "learning_rate": 7.353942400916936e-06, "loss": 0.397, "step": 3737 }, { "epoch": 0.3637956204379562, "grad_norm": 1.6534303379570627, "learning_rate": 7.352551727134005e-06, "loss": 0.6081, "step": 3738 }, { "epoch": 0.36389294403892947, "grad_norm": 1.247872353830009, "learning_rate": 7.351160819576564e-06, "loss": 0.3425, "step": 3739 }, { "epoch": 0.36399026763990266, "grad_norm": 1.5020180478592695, "learning_rate": 7.349769678382826e-06, "loss": 0.5016, "step": 3740 }, { "epoch": 0.3640875912408759, "grad_norm": 1.5279869032792845, "learning_rate": 7.34837830369103e-06, "loss": 0.3526, "step": 3741 }, { "epoch": 0.36418491484184917, "grad_norm": 1.3094265238017966, "learning_rate": 7.346986695639439e-06, "loss": 0.3748, "step": 3742 }, { "epoch": 0.36428223844282237, "grad_norm": 1.4537907377719694, "learning_rate": 7.34559485436634e-06, "loss": 0.1896, "step": 3743 }, { "epoch": 0.3643795620437956, "grad_norm": 1.329137618047409, "learning_rate": 7.344202780010036e-06, "loss": 0.3121, "step": 3744 }, { "epoch": 0.3644768856447689, "grad_norm": 1.6436280150893918, "learning_rate": 7.342810472708861e-06, "loss": 0.4822, "step": 3745 }, { "epoch": 0.3645742092457421, "grad_norm": 1.3725794380420981, "learning_rate": 7.341417932601169e-06, "loss": 0.3409, "step": 3746 }, { "epoch": 0.36467153284671533, "grad_norm": 1.5052359115379705, "learning_rate": 7.34002515982534e-06, "loss": 0.2377, "step": 3747 }, { "epoch": 0.3647688564476886, "grad_norm": 1.3552921410971608, "learning_rate": 7.3386321545197715e-06, "loss": 0.3572, "step": 3748 }, { "epoch": 0.3648661800486618, "grad_norm": 1.5018874056046176, "learning_rate": 7.337238916822888e-06, "loss": 0.4986, "step": 3749 }, { "epoch": 0.36496350364963503, "grad_norm": 1.8358104905369528, "learning_rate": 7.335845446873137e-06, "loss": 0.3914, "step": 3750 }, { "epoch": 0.3650608272506083, "grad_norm": 1.1168351051108918, "learning_rate": 7.334451744808988e-06, "loss": 0.2733, "step": 3751 }, { "epoch": 0.3651581508515815, "grad_norm": 1.3776079933771972, "learning_rate": 7.333057810768934e-06, "loss": 0.3787, "step": 3752 }, { "epoch": 0.36525547445255474, "grad_norm": 1.341701987297211, "learning_rate": 7.331663644891492e-06, "loss": 0.3567, "step": 3753 }, { "epoch": 0.365352798053528, "grad_norm": 1.5110248875717456, "learning_rate": 7.3302692473152e-06, "loss": 0.345, "step": 3754 }, { "epoch": 0.3654501216545012, "grad_norm": 1.3479707654958055, "learning_rate": 7.328874618178621e-06, "loss": 0.3874, "step": 3755 }, { "epoch": 0.36554744525547445, "grad_norm": 1.4702256277367285, "learning_rate": 7.32747975762034e-06, "loss": 0.2701, "step": 3756 }, { "epoch": 0.3656447688564477, "grad_norm": 1.2848329456259648, "learning_rate": 7.326084665778965e-06, "loss": 0.428, "step": 3757 }, { "epoch": 0.3657420924574209, "grad_norm": 1.3129833784540297, "learning_rate": 7.324689342793125e-06, "loss": 0.3708, "step": 3758 }, { "epoch": 0.36583941605839415, "grad_norm": 1.585613649874231, "learning_rate": 7.323293788801478e-06, "loss": 0.5306, "step": 3759 }, { "epoch": 0.3659367396593674, "grad_norm": 1.829241550219066, "learning_rate": 7.3218980039427e-06, "loss": 0.4737, "step": 3760 }, { "epoch": 0.36603406326034066, "grad_norm": 1.395899680918888, "learning_rate": 7.320501988355488e-06, "loss": 0.4636, "step": 3761 }, { "epoch": 0.36613138686131386, "grad_norm": 1.5046483178350247, "learning_rate": 7.319105742178568e-06, "loss": 0.4155, "step": 3762 }, { "epoch": 0.3662287104622871, "grad_norm": 1.280400622675231, "learning_rate": 7.317709265550685e-06, "loss": 0.3885, "step": 3763 }, { "epoch": 0.36632603406326036, "grad_norm": 1.217266157593417, "learning_rate": 7.316312558610608e-06, "loss": 0.3962, "step": 3764 }, { "epoch": 0.36642335766423356, "grad_norm": 1.263633008120406, "learning_rate": 7.314915621497129e-06, "loss": 0.478, "step": 3765 }, { "epoch": 0.3665206812652068, "grad_norm": 1.3563025380870863, "learning_rate": 7.31351845434906e-06, "loss": 0.4117, "step": 3766 }, { "epoch": 0.36661800486618007, "grad_norm": 1.2737220025097429, "learning_rate": 7.312121057305241e-06, "loss": 0.424, "step": 3767 }, { "epoch": 0.36671532846715327, "grad_norm": 1.291092078272358, "learning_rate": 7.3107234305045324e-06, "loss": 0.334, "step": 3768 }, { "epoch": 0.3668126520681265, "grad_norm": 1.3126844548992553, "learning_rate": 7.309325574085815e-06, "loss": 0.4322, "step": 3769 }, { "epoch": 0.3669099756690998, "grad_norm": 1.1245662064795194, "learning_rate": 7.307927488187997e-06, "loss": 0.2236, "step": 3770 }, { "epoch": 0.367007299270073, "grad_norm": 1.5102285684556136, "learning_rate": 7.306529172950006e-06, "loss": 0.5623, "step": 3771 }, { "epoch": 0.3671046228710462, "grad_norm": 1.685763702060817, "learning_rate": 7.305130628510792e-06, "loss": 0.6844, "step": 3772 }, { "epoch": 0.3672019464720195, "grad_norm": 1.2974033428491907, "learning_rate": 7.30373185500933e-06, "loss": 0.423, "step": 3773 }, { "epoch": 0.3672992700729927, "grad_norm": 1.5850183990084508, "learning_rate": 7.302332852584619e-06, "loss": 0.5261, "step": 3774 }, { "epoch": 0.36739659367396593, "grad_norm": 1.2629993659748047, "learning_rate": 7.3009336213756775e-06, "loss": 0.4166, "step": 3775 }, { "epoch": 0.3674939172749392, "grad_norm": 1.5305199731858852, "learning_rate": 7.299534161521548e-06, "loss": 0.3868, "step": 3776 }, { "epoch": 0.3675912408759124, "grad_norm": 1.3498481506141689, "learning_rate": 7.298134473161293e-06, "loss": 0.4383, "step": 3777 }, { "epoch": 0.36768856447688564, "grad_norm": 1.1775129878791764, "learning_rate": 7.296734556434006e-06, "loss": 0.3227, "step": 3778 }, { "epoch": 0.3677858880778589, "grad_norm": 1.2659702536536892, "learning_rate": 7.295334411478793e-06, "loss": 0.3261, "step": 3779 }, { "epoch": 0.3678832116788321, "grad_norm": 1.29047857012686, "learning_rate": 7.293934038434789e-06, "loss": 0.3675, "step": 3780 }, { "epoch": 0.36798053527980534, "grad_norm": 1.4852568566742426, "learning_rate": 7.292533437441149e-06, "loss": 0.5919, "step": 3781 }, { "epoch": 0.3680778588807786, "grad_norm": 1.6696818575227366, "learning_rate": 7.291132608637053e-06, "loss": 0.574, "step": 3782 }, { "epoch": 0.36817518248175185, "grad_norm": 1.1373482236107322, "learning_rate": 7.289731552161701e-06, "loss": 0.2637, "step": 3783 }, { "epoch": 0.36827250608272505, "grad_norm": 1.3365823182266015, "learning_rate": 7.288330268154318e-06, "loss": 0.3472, "step": 3784 }, { "epoch": 0.3683698296836983, "grad_norm": 1.372948416466753, "learning_rate": 7.286928756754148e-06, "loss": 0.328, "step": 3785 }, { "epoch": 0.36846715328467156, "grad_norm": 1.4321229130137978, "learning_rate": 7.285527018100464e-06, "loss": 0.4256, "step": 3786 }, { "epoch": 0.36856447688564475, "grad_norm": 1.5800422881297964, "learning_rate": 7.284125052332554e-06, "loss": 0.6671, "step": 3787 }, { "epoch": 0.368661800486618, "grad_norm": 1.2921074248553535, "learning_rate": 7.282722859589734e-06, "loss": 0.2557, "step": 3788 }, { "epoch": 0.36875912408759126, "grad_norm": 1.5092560968103714, "learning_rate": 7.281320440011339e-06, "loss": 0.483, "step": 3789 }, { "epoch": 0.36885644768856446, "grad_norm": 1.6123103806541292, "learning_rate": 7.279917793736732e-06, "loss": 0.6551, "step": 3790 }, { "epoch": 0.3689537712895377, "grad_norm": 1.4808246114399204, "learning_rate": 7.278514920905291e-06, "loss": 0.2513, "step": 3791 }, { "epoch": 0.36905109489051097, "grad_norm": 1.5663493234083814, "learning_rate": 7.277111821656423e-06, "loss": 0.571, "step": 3792 }, { "epoch": 0.36914841849148416, "grad_norm": 1.363238577188513, "learning_rate": 7.275708496129552e-06, "loss": 0.4348, "step": 3793 }, { "epoch": 0.3692457420924574, "grad_norm": 1.4565243900455658, "learning_rate": 7.27430494446413e-06, "loss": 0.4819, "step": 3794 }, { "epoch": 0.36934306569343067, "grad_norm": 1.384454947890668, "learning_rate": 7.272901166799628e-06, "loss": 0.3765, "step": 3795 }, { "epoch": 0.36944038929440387, "grad_norm": 1.4124479402383887, "learning_rate": 7.27149716327554e-06, "loss": 0.4979, "step": 3796 }, { "epoch": 0.3695377128953771, "grad_norm": 1.5746058137286074, "learning_rate": 7.270092934031383e-06, "loss": 0.6907, "step": 3797 }, { "epoch": 0.3696350364963504, "grad_norm": 1.149224148134044, "learning_rate": 7.268688479206694e-06, "loss": 0.2767, "step": 3798 }, { "epoch": 0.3697323600973236, "grad_norm": 1.3675830377895282, "learning_rate": 7.267283798941038e-06, "loss": 0.4495, "step": 3799 }, { "epoch": 0.36982968369829683, "grad_norm": 1.4280450276827679, "learning_rate": 7.265878893373996e-06, "loss": 0.3899, "step": 3800 }, { "epoch": 0.3699270072992701, "grad_norm": 1.49782627276864, "learning_rate": 7.264473762645178e-06, "loss": 0.4774, "step": 3801 }, { "epoch": 0.37002433090024334, "grad_norm": 1.3952118310784345, "learning_rate": 7.263068406894209e-06, "loss": 0.5262, "step": 3802 }, { "epoch": 0.37012165450121653, "grad_norm": 1.05962510209006, "learning_rate": 7.261662826260741e-06, "loss": 0.285, "step": 3803 }, { "epoch": 0.3702189781021898, "grad_norm": 1.1322437831904242, "learning_rate": 7.260257020884448e-06, "loss": 0.3328, "step": 3804 }, { "epoch": 0.37031630170316304, "grad_norm": 1.578419195268038, "learning_rate": 7.2588509909050254e-06, "loss": 0.6624, "step": 3805 }, { "epoch": 0.37041362530413624, "grad_norm": 1.6753162370187171, "learning_rate": 7.257444736462193e-06, "loss": 0.6607, "step": 3806 }, { "epoch": 0.3705109489051095, "grad_norm": 1.2155863954352035, "learning_rate": 7.2560382576956875e-06, "loss": 0.385, "step": 3807 }, { "epoch": 0.37060827250608275, "grad_norm": 1.3086656237549659, "learning_rate": 7.254631554745275e-06, "loss": 0.3251, "step": 3808 }, { "epoch": 0.37070559610705595, "grad_norm": 1.6932808088932305, "learning_rate": 7.253224627750738e-06, "loss": 0.5078, "step": 3809 }, { "epoch": 0.3708029197080292, "grad_norm": 1.140327832951829, "learning_rate": 7.251817476851886e-06, "loss": 0.3278, "step": 3810 }, { "epoch": 0.37090024330900245, "grad_norm": 1.2897084785014141, "learning_rate": 7.2504101021885475e-06, "loss": 0.3223, "step": 3811 }, { "epoch": 0.37099756690997565, "grad_norm": 1.4467933285693058, "learning_rate": 7.249002503900573e-06, "loss": 0.3934, "step": 3812 }, { "epoch": 0.3710948905109489, "grad_norm": 1.208314429606629, "learning_rate": 7.2475946821278374e-06, "loss": 0.4511, "step": 3813 }, { "epoch": 0.37119221411192216, "grad_norm": 1.3518646431718864, "learning_rate": 7.2461866370102354e-06, "loss": 0.4939, "step": 3814 }, { "epoch": 0.37128953771289536, "grad_norm": 1.360244692009117, "learning_rate": 7.244778368687688e-06, "loss": 0.3937, "step": 3815 }, { "epoch": 0.3713868613138686, "grad_norm": 1.3528197069652437, "learning_rate": 7.243369877300135e-06, "loss": 0.3492, "step": 3816 }, { "epoch": 0.37148418491484186, "grad_norm": 1.2751734783858382, "learning_rate": 7.2419611629875386e-06, "loss": 0.4052, "step": 3817 }, { "epoch": 0.37158150851581506, "grad_norm": 1.2580260101504126, "learning_rate": 7.240552225889882e-06, "loss": 0.3386, "step": 3818 }, { "epoch": 0.3716788321167883, "grad_norm": 1.3674139216806946, "learning_rate": 7.239143066147174e-06, "loss": 0.2891, "step": 3819 }, { "epoch": 0.37177615571776157, "grad_norm": 1.0530572795884554, "learning_rate": 7.237733683899444e-06, "loss": 0.2657, "step": 3820 }, { "epoch": 0.37187347931873477, "grad_norm": 2.205291656616187, "learning_rate": 7.236324079286742e-06, "loss": 0.2303, "step": 3821 }, { "epoch": 0.371970802919708, "grad_norm": 1.1572222874558429, "learning_rate": 7.234914252449141e-06, "loss": 0.2307, "step": 3822 }, { "epoch": 0.3720681265206813, "grad_norm": 1.3550158356870847, "learning_rate": 7.233504203526738e-06, "loss": 0.4026, "step": 3823 }, { "epoch": 0.37216545012165453, "grad_norm": 1.1916885791889118, "learning_rate": 7.232093932659648e-06, "loss": 0.384, "step": 3824 }, { "epoch": 0.3722627737226277, "grad_norm": 1.3181641384661131, "learning_rate": 7.230683439988013e-06, "loss": 0.3978, "step": 3825 }, { "epoch": 0.372360097323601, "grad_norm": 1.4449003714564748, "learning_rate": 7.229272725651995e-06, "loss": 0.4663, "step": 3826 }, { "epoch": 0.37245742092457423, "grad_norm": 1.110719215634453, "learning_rate": 7.2278617897917734e-06, "loss": 0.3073, "step": 3827 }, { "epoch": 0.37255474452554743, "grad_norm": 1.3075317628405025, "learning_rate": 7.226450632547558e-06, "loss": 0.341, "step": 3828 }, { "epoch": 0.3726520681265207, "grad_norm": 1.490372677606144, "learning_rate": 7.225039254059574e-06, "loss": 0.483, "step": 3829 }, { "epoch": 0.37274939172749394, "grad_norm": 1.3137386471795909, "learning_rate": 7.223627654468072e-06, "loss": 0.3739, "step": 3830 }, { "epoch": 0.37284671532846714, "grad_norm": 1.242608245033651, "learning_rate": 7.2222158339133245e-06, "loss": 0.2848, "step": 3831 }, { "epoch": 0.3729440389294404, "grad_norm": 1.4129798917912184, "learning_rate": 7.220803792535621e-06, "loss": 0.3657, "step": 3832 }, { "epoch": 0.37304136253041364, "grad_norm": 1.7023996330847022, "learning_rate": 7.2193915304752815e-06, "loss": 0.601, "step": 3833 }, { "epoch": 0.37313868613138684, "grad_norm": 1.587053329225552, "learning_rate": 7.2179790478726405e-06, "loss": 0.3858, "step": 3834 }, { "epoch": 0.3732360097323601, "grad_norm": 1.50243862921725, "learning_rate": 7.216566344868059e-06, "loss": 0.3792, "step": 3835 }, { "epoch": 0.37333333333333335, "grad_norm": 1.2996422990779706, "learning_rate": 7.215153421601917e-06, "loss": 0.3695, "step": 3836 }, { "epoch": 0.37343065693430655, "grad_norm": 1.2661927915794233, "learning_rate": 7.2137402782146185e-06, "loss": 0.2922, "step": 3837 }, { "epoch": 0.3735279805352798, "grad_norm": 1.342404468128034, "learning_rate": 7.212326914846587e-06, "loss": 0.3832, "step": 3838 }, { "epoch": 0.37362530413625306, "grad_norm": 1.3085065670293223, "learning_rate": 7.2109133316382716e-06, "loss": 0.3435, "step": 3839 }, { "epoch": 0.37372262773722625, "grad_norm": 1.4136536570943345, "learning_rate": 7.209499528730138e-06, "loss": 0.3996, "step": 3840 }, { "epoch": 0.3738199513381995, "grad_norm": 1.3042560709248439, "learning_rate": 7.208085506262679e-06, "loss": 0.3547, "step": 3841 }, { "epoch": 0.37391727493917276, "grad_norm": 1.1956805990066928, "learning_rate": 7.206671264376406e-06, "loss": 0.3349, "step": 3842 }, { "epoch": 0.37401459854014596, "grad_norm": 1.3725427866174449, "learning_rate": 7.205256803211852e-06, "loss": 0.3136, "step": 3843 }, { "epoch": 0.3741119221411192, "grad_norm": 1.1819946263218575, "learning_rate": 7.203842122909576e-06, "loss": 0.3854, "step": 3844 }, { "epoch": 0.37420924574209247, "grad_norm": 1.1749292879944346, "learning_rate": 7.202427223610153e-06, "loss": 0.2567, "step": 3845 }, { "epoch": 0.3743065693430657, "grad_norm": 1.6177760725542194, "learning_rate": 7.201012105454181e-06, "loss": 0.6999, "step": 3846 }, { "epoch": 0.3744038929440389, "grad_norm": 1.5266511327183672, "learning_rate": 7.199596768582284e-06, "loss": 0.5089, "step": 3847 }, { "epoch": 0.37450121654501217, "grad_norm": 1.351768037018499, "learning_rate": 7.198181213135107e-06, "loss": 0.3024, "step": 3848 }, { "epoch": 0.3745985401459854, "grad_norm": 2.0647138793608883, "learning_rate": 7.19676543925331e-06, "loss": 0.4637, "step": 3849 }, { "epoch": 0.3746958637469586, "grad_norm": 1.0901772593808003, "learning_rate": 7.19534944707758e-06, "loss": 0.3197, "step": 3850 }, { "epoch": 0.3747931873479319, "grad_norm": 1.5563466788905995, "learning_rate": 7.193933236748627e-06, "loss": 0.4692, "step": 3851 }, { "epoch": 0.37489051094890513, "grad_norm": 1.6383898814090063, "learning_rate": 7.192516808407179e-06, "loss": 0.4814, "step": 3852 }, { "epoch": 0.37498783454987833, "grad_norm": 1.234160440846945, "learning_rate": 7.191100162193989e-06, "loss": 0.4099, "step": 3853 }, { "epoch": 0.3750851581508516, "grad_norm": 1.5270305810050784, "learning_rate": 7.189683298249829e-06, "loss": 0.553, "step": 3854 }, { "epoch": 0.37518248175182484, "grad_norm": 1.4979140559754238, "learning_rate": 7.1882662167154935e-06, "loss": 0.2972, "step": 3855 }, { "epoch": 0.37527980535279803, "grad_norm": 1.4793577831895819, "learning_rate": 7.186848917731799e-06, "loss": 0.517, "step": 3856 }, { "epoch": 0.3753771289537713, "grad_norm": 1.1834990208552847, "learning_rate": 7.1854314014395836e-06, "loss": 0.2198, "step": 3857 }, { "epoch": 0.37547445255474454, "grad_norm": 1.4192969456874913, "learning_rate": 7.184013667979707e-06, "loss": 0.4349, "step": 3858 }, { "epoch": 0.37557177615571774, "grad_norm": 1.411758906981302, "learning_rate": 7.1825957174930495e-06, "loss": 0.6343, "step": 3859 }, { "epoch": 0.375669099756691, "grad_norm": 1.4732647759178685, "learning_rate": 7.181177550120514e-06, "loss": 0.4049, "step": 3860 }, { "epoch": 0.37576642335766425, "grad_norm": 1.3463082468649135, "learning_rate": 7.1797591660030245e-06, "loss": 0.4196, "step": 3861 }, { "epoch": 0.37586374695863745, "grad_norm": 1.2161261851692835, "learning_rate": 7.178340565281527e-06, "loss": 0.3326, "step": 3862 }, { "epoch": 0.3759610705596107, "grad_norm": 1.3015426377776436, "learning_rate": 7.176921748096987e-06, "loss": 0.3816, "step": 3863 }, { "epoch": 0.37605839416058395, "grad_norm": 1.2745689561042768, "learning_rate": 7.175502714590398e-06, "loss": 0.365, "step": 3864 }, { "epoch": 0.37615571776155715, "grad_norm": 1.3894630463948126, "learning_rate": 7.174083464902765e-06, "loss": 0.5022, "step": 3865 }, { "epoch": 0.3762530413625304, "grad_norm": 1.6621696201258571, "learning_rate": 7.172663999175123e-06, "loss": 0.6661, "step": 3866 }, { "epoch": 0.37635036496350366, "grad_norm": 1.6093508524147906, "learning_rate": 7.171244317548522e-06, "loss": 0.2752, "step": 3867 }, { "epoch": 0.3764476885644769, "grad_norm": 1.2470356123121078, "learning_rate": 7.16982442016404e-06, "loss": 0.2922, "step": 3868 }, { "epoch": 0.3765450121654501, "grad_norm": 1.4271789388815392, "learning_rate": 7.168404307162773e-06, "loss": 0.3537, "step": 3869 }, { "epoch": 0.37664233576642336, "grad_norm": 1.2732277445845157, "learning_rate": 7.166983978685835e-06, "loss": 0.2861, "step": 3870 }, { "epoch": 0.3767396593673966, "grad_norm": 1.5040576412741455, "learning_rate": 7.165563434874367e-06, "loss": 0.5636, "step": 3871 }, { "epoch": 0.3768369829683698, "grad_norm": 1.305952270015343, "learning_rate": 7.164142675869531e-06, "loss": 0.3189, "step": 3872 }, { "epoch": 0.37693430656934307, "grad_norm": 1.297847888444762, "learning_rate": 7.162721701812506e-06, "loss": 0.2904, "step": 3873 }, { "epoch": 0.3770316301703163, "grad_norm": 1.4659816543867576, "learning_rate": 7.161300512844496e-06, "loss": 0.2955, "step": 3874 }, { "epoch": 0.3771289537712895, "grad_norm": 1.2035904516565876, "learning_rate": 7.159879109106726e-06, "loss": 0.2557, "step": 3875 }, { "epoch": 0.3772262773722628, "grad_norm": 1.598650858223209, "learning_rate": 7.158457490740442e-06, "loss": 0.7335, "step": 3876 }, { "epoch": 0.37732360097323603, "grad_norm": 1.2305628180498833, "learning_rate": 7.157035657886911e-06, "loss": 0.2654, "step": 3877 }, { "epoch": 0.3774209245742092, "grad_norm": 1.709426282151801, "learning_rate": 7.1556136106874195e-06, "loss": 0.4445, "step": 3878 }, { "epoch": 0.3775182481751825, "grad_norm": 1.5920181588666038, "learning_rate": 7.154191349283278e-06, "loss": 0.4233, "step": 3879 }, { "epoch": 0.37761557177615573, "grad_norm": 1.4519282509492475, "learning_rate": 7.152768873815819e-06, "loss": 0.4399, "step": 3880 }, { "epoch": 0.37771289537712893, "grad_norm": 1.118109262781078, "learning_rate": 7.151346184426394e-06, "loss": 0.2138, "step": 3881 }, { "epoch": 0.3778102189781022, "grad_norm": 1.233018443573734, "learning_rate": 7.1499232812563765e-06, "loss": 0.266, "step": 3882 }, { "epoch": 0.37790754257907544, "grad_norm": 1.4646778063558124, "learning_rate": 7.148500164447159e-06, "loss": 0.4118, "step": 3883 }, { "epoch": 0.37800486618004864, "grad_norm": 1.1142467929519195, "learning_rate": 7.147076834140163e-06, "loss": 0.3422, "step": 3884 }, { "epoch": 0.3781021897810219, "grad_norm": 1.2249131016174075, "learning_rate": 7.145653290476821e-06, "loss": 0.3973, "step": 3885 }, { "epoch": 0.37819951338199514, "grad_norm": 1.3075094712827902, "learning_rate": 7.144229533598593e-06, "loss": 0.4029, "step": 3886 }, { "epoch": 0.37829683698296834, "grad_norm": 1.2831548162044333, "learning_rate": 7.142805563646957e-06, "loss": 0.3863, "step": 3887 }, { "epoch": 0.3783941605839416, "grad_norm": 0.8015397364189331, "learning_rate": 7.1413813807634144e-06, "loss": 0.1929, "step": 3888 }, { "epoch": 0.37849148418491485, "grad_norm": 2.1960941972871186, "learning_rate": 7.1399569850894886e-06, "loss": 0.4767, "step": 3889 }, { "epoch": 0.3785888077858881, "grad_norm": 1.4481987255363193, "learning_rate": 7.138532376766722e-06, "loss": 0.4454, "step": 3890 }, { "epoch": 0.3786861313868613, "grad_norm": 0.951708820286833, "learning_rate": 7.13710755593668e-06, "loss": 0.186, "step": 3891 }, { "epoch": 0.37878345498783456, "grad_norm": 1.6714364305481455, "learning_rate": 7.1356825227409455e-06, "loss": 0.6873, "step": 3892 }, { "epoch": 0.3788807785888078, "grad_norm": 2.6374385953146238, "learning_rate": 7.134257277321126e-06, "loss": 0.3606, "step": 3893 }, { "epoch": 0.378978102189781, "grad_norm": 1.326824044728447, "learning_rate": 7.13283181981885e-06, "loss": 0.2763, "step": 3894 }, { "epoch": 0.37907542579075426, "grad_norm": 1.3996167884606898, "learning_rate": 7.131406150375764e-06, "loss": 0.5331, "step": 3895 }, { "epoch": 0.3791727493917275, "grad_norm": 1.2228137027647799, "learning_rate": 7.129980269133539e-06, "loss": 0.27, "step": 3896 }, { "epoch": 0.3792700729927007, "grad_norm": 1.4657212540958298, "learning_rate": 7.128554176233865e-06, "loss": 0.3137, "step": 3897 }, { "epoch": 0.37936739659367397, "grad_norm": 1.9918813432796558, "learning_rate": 7.127127871818455e-06, "loss": 0.3948, "step": 3898 }, { "epoch": 0.3794647201946472, "grad_norm": 1.8850475673593161, "learning_rate": 7.12570135602904e-06, "loss": 0.775, "step": 3899 }, { "epoch": 0.3795620437956204, "grad_norm": 1.6679949077189995, "learning_rate": 7.124274629007375e-06, "loss": 0.6015, "step": 3900 }, { "epoch": 0.37965936739659367, "grad_norm": 1.2107596482139231, "learning_rate": 7.122847690895235e-06, "loss": 0.2573, "step": 3901 }, { "epoch": 0.3797566909975669, "grad_norm": 1.352463967342193, "learning_rate": 7.1214205418344155e-06, "loss": 0.428, "step": 3902 }, { "epoch": 0.3798540145985401, "grad_norm": 1.6246032230437741, "learning_rate": 7.1199931819667316e-06, "loss": 0.5207, "step": 3903 }, { "epoch": 0.3799513381995134, "grad_norm": 1.4207183828680439, "learning_rate": 7.118565611434023e-06, "loss": 0.3472, "step": 3904 }, { "epoch": 0.38004866180048663, "grad_norm": 1.1713460109772222, "learning_rate": 7.117137830378147e-06, "loss": 0.3043, "step": 3905 }, { "epoch": 0.38014598540145983, "grad_norm": 1.4955277530157316, "learning_rate": 7.115709838940983e-06, "loss": 0.5889, "step": 3906 }, { "epoch": 0.3802433090024331, "grad_norm": 1.4020682146561894, "learning_rate": 7.114281637264433e-06, "loss": 0.5088, "step": 3907 }, { "epoch": 0.38034063260340634, "grad_norm": 1.547124360918337, "learning_rate": 7.112853225490417e-06, "loss": 0.3316, "step": 3908 }, { "epoch": 0.38043795620437953, "grad_norm": 1.2645380509557882, "learning_rate": 7.111424603760877e-06, "loss": 0.4013, "step": 3909 }, { "epoch": 0.3805352798053528, "grad_norm": 1.7019140855865837, "learning_rate": 7.109995772217776e-06, "loss": 0.6348, "step": 3910 }, { "epoch": 0.38063260340632604, "grad_norm": 1.304725235261976, "learning_rate": 7.108566731003099e-06, "loss": 0.4496, "step": 3911 }, { "epoch": 0.3807299270072993, "grad_norm": 1.324816183619312, "learning_rate": 7.1071374802588496e-06, "loss": 0.3335, "step": 3912 }, { "epoch": 0.3808272506082725, "grad_norm": 1.3880021724436575, "learning_rate": 7.1057080201270535e-06, "loss": 0.46, "step": 3913 }, { "epoch": 0.38092457420924575, "grad_norm": 1.5324573817091165, "learning_rate": 7.104278350749757e-06, "loss": 0.4837, "step": 3914 }, { "epoch": 0.381021897810219, "grad_norm": 1.415341468021005, "learning_rate": 7.1028484722690275e-06, "loss": 0.2941, "step": 3915 }, { "epoch": 0.3811192214111922, "grad_norm": 1.2327883216411208, "learning_rate": 7.101418384826953e-06, "loss": 0.4483, "step": 3916 }, { "epoch": 0.38121654501216545, "grad_norm": 1.4959339823461342, "learning_rate": 7.099988088565642e-06, "loss": 0.4297, "step": 3917 }, { "epoch": 0.3813138686131387, "grad_norm": 1.5384484629808992, "learning_rate": 7.098557583627224e-06, "loss": 0.4326, "step": 3918 }, { "epoch": 0.3814111922141119, "grad_norm": 1.6679471659994731, "learning_rate": 7.097126870153849e-06, "loss": 0.5069, "step": 3919 }, { "epoch": 0.38150851581508516, "grad_norm": 1.45117080708974, "learning_rate": 7.095695948287686e-06, "loss": 0.3661, "step": 3920 }, { "epoch": 0.3816058394160584, "grad_norm": 1.4481947743840338, "learning_rate": 7.094264818170931e-06, "loss": 0.3961, "step": 3921 }, { "epoch": 0.3817031630170316, "grad_norm": 1.348596217427556, "learning_rate": 7.092833479945793e-06, "loss": 0.4029, "step": 3922 }, { "epoch": 0.38180048661800486, "grad_norm": 1.3981471692690721, "learning_rate": 7.091401933754507e-06, "loss": 0.3814, "step": 3923 }, { "epoch": 0.3818978102189781, "grad_norm": 1.8099756090617796, "learning_rate": 7.089970179739323e-06, "loss": 0.3998, "step": 3924 }, { "epoch": 0.3819951338199513, "grad_norm": 1.5103246288100645, "learning_rate": 7.088538218042519e-06, "loss": 0.5697, "step": 3925 }, { "epoch": 0.38209245742092457, "grad_norm": 1.1477205283158238, "learning_rate": 7.087106048806388e-06, "loss": 0.3023, "step": 3926 }, { "epoch": 0.3821897810218978, "grad_norm": 1.2535758370464407, "learning_rate": 7.085673672173247e-06, "loss": 0.3408, "step": 3927 }, { "epoch": 0.382287104622871, "grad_norm": 1.1952324136071744, "learning_rate": 7.0842410882854305e-06, "loss": 0.3339, "step": 3928 }, { "epoch": 0.3823844282238443, "grad_norm": 1.2651491565748119, "learning_rate": 7.082808297285296e-06, "loss": 0.3817, "step": 3929 }, { "epoch": 0.38248175182481753, "grad_norm": 1.5429938591954184, "learning_rate": 7.081375299315221e-06, "loss": 0.5274, "step": 3930 }, { "epoch": 0.3825790754257907, "grad_norm": 1.3843509715042235, "learning_rate": 7.0799420945176026e-06, "loss": 0.4354, "step": 3931 }, { "epoch": 0.382676399026764, "grad_norm": 1.2382754737887756, "learning_rate": 7.078508683034862e-06, "loss": 0.4135, "step": 3932 }, { "epoch": 0.38277372262773723, "grad_norm": 1.659344068335914, "learning_rate": 7.0770750650094335e-06, "loss": 0.357, "step": 3933 }, { "epoch": 0.3828710462287105, "grad_norm": 1.2935879886930641, "learning_rate": 7.0756412405837795e-06, "loss": 0.3475, "step": 3934 }, { "epoch": 0.3829683698296837, "grad_norm": 1.279920120428988, "learning_rate": 7.07420720990038e-06, "loss": 0.2811, "step": 3935 }, { "epoch": 0.38306569343065694, "grad_norm": 1.5715982667528452, "learning_rate": 7.072772973101735e-06, "loss": 0.2954, "step": 3936 }, { "epoch": 0.3831630170316302, "grad_norm": 1.4996379574324572, "learning_rate": 7.071338530330365e-06, "loss": 0.3162, "step": 3937 }, { "epoch": 0.3832603406326034, "grad_norm": 1.323656350948574, "learning_rate": 7.069903881728815e-06, "loss": 0.4149, "step": 3938 }, { "epoch": 0.38335766423357664, "grad_norm": 1.5522008591091048, "learning_rate": 7.068469027439642e-06, "loss": 0.5585, "step": 3939 }, { "epoch": 0.3834549878345499, "grad_norm": 1.6876876161618577, "learning_rate": 7.06703396760543e-06, "loss": 0.3946, "step": 3940 }, { "epoch": 0.3835523114355231, "grad_norm": 1.5052911896213828, "learning_rate": 7.065598702368782e-06, "loss": 0.5529, "step": 3941 }, { "epoch": 0.38364963503649635, "grad_norm": 1.7507247786327806, "learning_rate": 7.0641632318723205e-06, "loss": 0.6298, "step": 3942 }, { "epoch": 0.3837469586374696, "grad_norm": 1.295827327522188, "learning_rate": 7.062727556258693e-06, "loss": 0.3517, "step": 3943 }, { "epoch": 0.3838442822384428, "grad_norm": 1.037020059823786, "learning_rate": 7.061291675670557e-06, "loss": 0.2667, "step": 3944 }, { "epoch": 0.38394160583941606, "grad_norm": 1.4712371869692, "learning_rate": 7.059855590250604e-06, "loss": 0.4601, "step": 3945 }, { "epoch": 0.3840389294403893, "grad_norm": 1.4848893729981114, "learning_rate": 7.058419300141531e-06, "loss": 0.458, "step": 3946 }, { "epoch": 0.3841362530413625, "grad_norm": 1.3674844473838341, "learning_rate": 7.056982805486069e-06, "loss": 0.4278, "step": 3947 }, { "epoch": 0.38423357664233576, "grad_norm": 1.6438745197663538, "learning_rate": 7.055546106426961e-06, "loss": 0.6002, "step": 3948 }, { "epoch": 0.384330900243309, "grad_norm": 1.2362894591542046, "learning_rate": 7.054109203106974e-06, "loss": 0.3796, "step": 3949 }, { "epoch": 0.3844282238442822, "grad_norm": 1.6016145279039653, "learning_rate": 7.052672095668891e-06, "loss": 0.4956, "step": 3950 }, { "epoch": 0.38452554744525547, "grad_norm": 1.578296991928203, "learning_rate": 7.0512347842555205e-06, "loss": 0.3424, "step": 3951 }, { "epoch": 0.3846228710462287, "grad_norm": 1.1907848300937582, "learning_rate": 7.049797269009689e-06, "loss": 0.361, "step": 3952 }, { "epoch": 0.384720194647202, "grad_norm": 1.4905027295394417, "learning_rate": 7.048359550074244e-06, "loss": 0.4279, "step": 3953 }, { "epoch": 0.38481751824817517, "grad_norm": 1.4651231016184674, "learning_rate": 7.046921627592051e-06, "loss": 0.4622, "step": 3954 }, { "epoch": 0.3849148418491484, "grad_norm": 1.3954586503570294, "learning_rate": 7.045483501705997e-06, "loss": 0.2737, "step": 3955 }, { "epoch": 0.3850121654501217, "grad_norm": 1.4949563585883912, "learning_rate": 7.044045172558991e-06, "loss": 0.3092, "step": 3956 }, { "epoch": 0.3851094890510949, "grad_norm": 1.3655729515230581, "learning_rate": 7.042606640293958e-06, "loss": 0.4943, "step": 3957 }, { "epoch": 0.38520681265206813, "grad_norm": 1.350946010163059, "learning_rate": 7.04116790505385e-06, "loss": 0.5314, "step": 3958 }, { "epoch": 0.3853041362530414, "grad_norm": 1.3729520632050074, "learning_rate": 7.039728966981632e-06, "loss": 0.4056, "step": 3959 }, { "epoch": 0.3854014598540146, "grad_norm": 1.4800883184320073, "learning_rate": 7.038289826220292e-06, "loss": 0.4511, "step": 3960 }, { "epoch": 0.38549878345498784, "grad_norm": 1.4839457644194014, "learning_rate": 7.036850482912841e-06, "loss": 0.5634, "step": 3961 }, { "epoch": 0.3855961070559611, "grad_norm": 1.4722550146581384, "learning_rate": 7.035410937202303e-06, "loss": 0.4885, "step": 3962 }, { "epoch": 0.3856934306569343, "grad_norm": 1.385565264579537, "learning_rate": 7.033971189231731e-06, "loss": 0.4708, "step": 3963 }, { "epoch": 0.38579075425790754, "grad_norm": 1.4522732114770711, "learning_rate": 7.032531239144192e-06, "loss": 0.5378, "step": 3964 }, { "epoch": 0.3858880778588808, "grad_norm": 1.14748867569532, "learning_rate": 7.031091087082773e-06, "loss": 0.3676, "step": 3965 }, { "epoch": 0.385985401459854, "grad_norm": 1.3321032015253773, "learning_rate": 7.029650733190585e-06, "loss": 0.4896, "step": 3966 }, { "epoch": 0.38608272506082725, "grad_norm": 1.2135132498408774, "learning_rate": 7.028210177610755e-06, "loss": 0.3284, "step": 3967 }, { "epoch": 0.3861800486618005, "grad_norm": 1.5809800411366401, "learning_rate": 7.026769420486435e-06, "loss": 0.5651, "step": 3968 }, { "epoch": 0.3862773722627737, "grad_norm": 1.2932935231921396, "learning_rate": 7.025328461960791e-06, "loss": 0.3895, "step": 3969 }, { "epoch": 0.38637469586374695, "grad_norm": 1.1609928698062038, "learning_rate": 7.023887302177013e-06, "loss": 0.3102, "step": 3970 }, { "epoch": 0.3864720194647202, "grad_norm": 1.3129146011919008, "learning_rate": 7.022445941278308e-06, "loss": 0.3229, "step": 3971 }, { "epoch": 0.3865693430656934, "grad_norm": 1.5073708000931252, "learning_rate": 7.02100437940791e-06, "loss": 0.4405, "step": 3972 }, { "epoch": 0.38666666666666666, "grad_norm": 1.525639512470862, "learning_rate": 7.019562616709061e-06, "loss": 0.4644, "step": 3973 }, { "epoch": 0.3867639902676399, "grad_norm": 1.6237386062609962, "learning_rate": 7.018120653325037e-06, "loss": 0.6601, "step": 3974 }, { "epoch": 0.38686131386861317, "grad_norm": 1.345911062533185, "learning_rate": 7.016678489399121e-06, "loss": 0.5065, "step": 3975 }, { "epoch": 0.38695863746958636, "grad_norm": 1.514524079658759, "learning_rate": 7.015236125074626e-06, "loss": 0.2811, "step": 3976 }, { "epoch": 0.3870559610705596, "grad_norm": 1.4135796159763112, "learning_rate": 7.013793560494877e-06, "loss": 0.4894, "step": 3977 }, { "epoch": 0.38715328467153287, "grad_norm": 1.4104178653563566, "learning_rate": 7.012350795803223e-06, "loss": 0.5016, "step": 3978 }, { "epoch": 0.38725060827250607, "grad_norm": 1.3045531335042206, "learning_rate": 7.010907831143035e-06, "loss": 0.2089, "step": 3979 }, { "epoch": 0.3873479318734793, "grad_norm": 1.4042038760740407, "learning_rate": 7.009464666657701e-06, "loss": 0.3769, "step": 3980 }, { "epoch": 0.3874452554744526, "grad_norm": 1.479965245122337, "learning_rate": 7.008021302490626e-06, "loss": 0.4625, "step": 3981 }, { "epoch": 0.3875425790754258, "grad_norm": 1.4175283205139955, "learning_rate": 7.0065777387852405e-06, "loss": 0.3618, "step": 3982 }, { "epoch": 0.38763990267639903, "grad_norm": 1.5162882975673595, "learning_rate": 7.005133975684992e-06, "loss": 0.4722, "step": 3983 }, { "epoch": 0.3877372262773723, "grad_norm": 1.4933222106252568, "learning_rate": 7.003690013333348e-06, "loss": 0.2983, "step": 3984 }, { "epoch": 0.3878345498783455, "grad_norm": 1.3557382804846623, "learning_rate": 7.002245851873794e-06, "loss": 0.5318, "step": 3985 }, { "epoch": 0.38793187347931873, "grad_norm": 1.167640461283426, "learning_rate": 7.000801491449843e-06, "loss": 0.2841, "step": 3986 }, { "epoch": 0.388029197080292, "grad_norm": 1.4266522816945901, "learning_rate": 6.9993569322050145e-06, "loss": 0.492, "step": 3987 }, { "epoch": 0.3881265206812652, "grad_norm": 1.4821564005694208, "learning_rate": 6.997912174282859e-06, "loss": 0.4676, "step": 3988 }, { "epoch": 0.38822384428223844, "grad_norm": 1.2497816848843788, "learning_rate": 6.996467217826944e-06, "loss": 0.3818, "step": 3989 }, { "epoch": 0.3883211678832117, "grad_norm": 1.3419552727592317, "learning_rate": 6.995022062980854e-06, "loss": 0.3393, "step": 3990 }, { "epoch": 0.3884184914841849, "grad_norm": 1.8154719749479167, "learning_rate": 6.993576709888196e-06, "loss": 0.4484, "step": 3991 }, { "epoch": 0.38851581508515814, "grad_norm": 1.351000409855417, "learning_rate": 6.992131158692594e-06, "loss": 0.4945, "step": 3992 }, { "epoch": 0.3886131386861314, "grad_norm": 1.6363889229872557, "learning_rate": 6.9906854095376946e-06, "loss": 0.5338, "step": 3993 }, { "epoch": 0.3887104622871046, "grad_norm": 1.7802129035237775, "learning_rate": 6.989239462567162e-06, "loss": 0.3984, "step": 3994 }, { "epoch": 0.38880778588807785, "grad_norm": 1.5793157183409696, "learning_rate": 6.987793317924683e-06, "loss": 0.3973, "step": 3995 }, { "epoch": 0.3889051094890511, "grad_norm": 1.5416565542490828, "learning_rate": 6.986346975753958e-06, "loss": 0.5045, "step": 3996 }, { "epoch": 0.38900243309002436, "grad_norm": 0.8915263517146853, "learning_rate": 6.984900436198715e-06, "loss": 0.1697, "step": 3997 }, { "epoch": 0.38909975669099756, "grad_norm": 1.5546582089832586, "learning_rate": 6.983453699402695e-06, "loss": 0.4891, "step": 3998 }, { "epoch": 0.3891970802919708, "grad_norm": 1.5087179519043437, "learning_rate": 6.9820067655096615e-06, "loss": 0.526, "step": 3999 }, { "epoch": 0.38929440389294406, "grad_norm": 1.5145233712620612, "learning_rate": 6.980559634663397e-06, "loss": 0.4787, "step": 4000 }, { "epoch": 0.38939172749391726, "grad_norm": 1.418350630690952, "learning_rate": 6.979112307007705e-06, "loss": 0.5409, "step": 4001 }, { "epoch": 0.3894890510948905, "grad_norm": 1.6433890174783798, "learning_rate": 6.977664782686406e-06, "loss": 0.4894, "step": 4002 }, { "epoch": 0.38958637469586377, "grad_norm": 1.1028189021006047, "learning_rate": 6.976217061843343e-06, "loss": 0.2523, "step": 4003 }, { "epoch": 0.38968369829683697, "grad_norm": 1.1910178630203534, "learning_rate": 6.974769144622374e-06, "loss": 0.261, "step": 4004 }, { "epoch": 0.3897810218978102, "grad_norm": 1.416321272912632, "learning_rate": 6.9733210311673826e-06, "loss": 0.3422, "step": 4005 }, { "epoch": 0.3898783454987835, "grad_norm": 1.1678284015343599, "learning_rate": 6.971872721622268e-06, "loss": 0.2577, "step": 4006 }, { "epoch": 0.38997566909975667, "grad_norm": 1.6751662328419257, "learning_rate": 6.970424216130949e-06, "loss": 0.4419, "step": 4007 }, { "epoch": 0.3900729927007299, "grad_norm": 1.3122627861372418, "learning_rate": 6.968975514837364e-06, "loss": 0.3431, "step": 4008 }, { "epoch": 0.3901703163017032, "grad_norm": 1.4908254908438412, "learning_rate": 6.967526617885471e-06, "loss": 0.5071, "step": 4009 }, { "epoch": 0.3902676399026764, "grad_norm": 1.273008168870797, "learning_rate": 6.966077525419249e-06, "loss": 0.3637, "step": 4010 }, { "epoch": 0.39036496350364963, "grad_norm": 1.8505049634947817, "learning_rate": 6.964628237582696e-06, "loss": 0.6389, "step": 4011 }, { "epoch": 0.3904622871046229, "grad_norm": 1.350943274678673, "learning_rate": 6.963178754519826e-06, "loss": 0.3458, "step": 4012 }, { "epoch": 0.3905596107055961, "grad_norm": 1.6230761069776574, "learning_rate": 6.961729076374679e-06, "loss": 0.5931, "step": 4013 }, { "epoch": 0.39065693430656934, "grad_norm": 1.4687261376573049, "learning_rate": 6.960279203291305e-06, "loss": 0.5103, "step": 4014 }, { "epoch": 0.3907542579075426, "grad_norm": 2.279060462648419, "learning_rate": 6.958829135413783e-06, "loss": 0.3421, "step": 4015 }, { "epoch": 0.3908515815085158, "grad_norm": 1.4453991838183962, "learning_rate": 6.957378872886205e-06, "loss": 0.4648, "step": 4016 }, { "epoch": 0.39094890510948904, "grad_norm": 1.2752498064098947, "learning_rate": 6.955928415852686e-06, "loss": 0.2475, "step": 4017 }, { "epoch": 0.3910462287104623, "grad_norm": 1.3162378944934356, "learning_rate": 6.954477764457359e-06, "loss": 0.5026, "step": 4018 }, { "epoch": 0.39114355231143555, "grad_norm": 1.4455647016550808, "learning_rate": 6.953026918844375e-06, "loss": 0.3693, "step": 4019 }, { "epoch": 0.39124087591240875, "grad_norm": 1.7197570624779743, "learning_rate": 6.951575879157904e-06, "loss": 0.5467, "step": 4020 }, { "epoch": 0.391338199513382, "grad_norm": 1.3683763838921565, "learning_rate": 6.950124645542139e-06, "loss": 0.3125, "step": 4021 }, { "epoch": 0.39143552311435525, "grad_norm": 1.3684386713615748, "learning_rate": 6.948673218141291e-06, "loss": 0.4659, "step": 4022 }, { "epoch": 0.39153284671532845, "grad_norm": 2.7578384354968497, "learning_rate": 6.947221597099585e-06, "loss": 0.4887, "step": 4023 }, { "epoch": 0.3916301703163017, "grad_norm": 1.331068524451763, "learning_rate": 6.945769782561273e-06, "loss": 0.3888, "step": 4024 }, { "epoch": 0.39172749391727496, "grad_norm": 1.7490396589403405, "learning_rate": 6.944317774670622e-06, "loss": 0.3748, "step": 4025 }, { "epoch": 0.39182481751824816, "grad_norm": 1.4248992281969828, "learning_rate": 6.942865573571919e-06, "loss": 0.3915, "step": 4026 }, { "epoch": 0.3919221411192214, "grad_norm": 1.485964355867862, "learning_rate": 6.941413179409468e-06, "loss": 0.3346, "step": 4027 }, { "epoch": 0.39201946472019467, "grad_norm": 1.5466928085770633, "learning_rate": 6.939960592327599e-06, "loss": 0.5374, "step": 4028 }, { "epoch": 0.39211678832116786, "grad_norm": 1.5883887378893773, "learning_rate": 6.938507812470652e-06, "loss": 0.5226, "step": 4029 }, { "epoch": 0.3922141119221411, "grad_norm": 2.786093191780367, "learning_rate": 6.937054839982993e-06, "loss": 0.2959, "step": 4030 }, { "epoch": 0.39231143552311437, "grad_norm": 1.3276365840641022, "learning_rate": 6.935601675009003e-06, "loss": 0.4711, "step": 4031 }, { "epoch": 0.39240875912408757, "grad_norm": 1.289716288913328, "learning_rate": 6.934148317693083e-06, "loss": 0.2954, "step": 4032 }, { "epoch": 0.3925060827250608, "grad_norm": 1.44676589060076, "learning_rate": 6.932694768179659e-06, "loss": 0.326, "step": 4033 }, { "epoch": 0.3926034063260341, "grad_norm": 1.6207833969674215, "learning_rate": 6.9312410266131665e-06, "loss": 0.4315, "step": 4034 }, { "epoch": 0.3927007299270073, "grad_norm": 1.5436789121746861, "learning_rate": 6.929787093138067e-06, "loss": 0.249, "step": 4035 }, { "epoch": 0.39279805352798053, "grad_norm": 1.3498642235046783, "learning_rate": 6.9283329678988375e-06, "loss": 0.4106, "step": 4036 }, { "epoch": 0.3928953771289538, "grad_norm": 1.3134123038417191, "learning_rate": 6.926878651039975e-06, "loss": 0.3761, "step": 4037 }, { "epoch": 0.392992700729927, "grad_norm": 1.4258818290455284, "learning_rate": 6.925424142705997e-06, "loss": 0.3464, "step": 4038 }, { "epoch": 0.39309002433090023, "grad_norm": 1.4508029884759852, "learning_rate": 6.92396944304144e-06, "loss": 0.3037, "step": 4039 }, { "epoch": 0.3931873479318735, "grad_norm": 2.156543435295907, "learning_rate": 6.922514552190856e-06, "loss": 0.6332, "step": 4040 }, { "epoch": 0.39328467153284674, "grad_norm": 1.6813909365209494, "learning_rate": 6.921059470298819e-06, "loss": 0.7023, "step": 4041 }, { "epoch": 0.39338199513381994, "grad_norm": 1.3183413808089122, "learning_rate": 6.91960419750992e-06, "loss": 0.4137, "step": 4042 }, { "epoch": 0.3934793187347932, "grad_norm": 1.626376595990447, "learning_rate": 6.918148733968774e-06, "loss": 0.5167, "step": 4043 }, { "epoch": 0.39357664233576645, "grad_norm": 1.3584619722516016, "learning_rate": 6.916693079820009e-06, "loss": 0.4243, "step": 4044 }, { "epoch": 0.39367396593673964, "grad_norm": 2.692105804461361, "learning_rate": 6.915237235208274e-06, "loss": 0.2159, "step": 4045 }, { "epoch": 0.3937712895377129, "grad_norm": 1.4379984779190689, "learning_rate": 6.913781200278239e-06, "loss": 0.3612, "step": 4046 }, { "epoch": 0.39386861313868615, "grad_norm": 1.2761654800240798, "learning_rate": 6.9123249751745866e-06, "loss": 0.3696, "step": 4047 }, { "epoch": 0.39396593673965935, "grad_norm": 1.0634612055178503, "learning_rate": 6.91086856004203e-06, "loss": 0.3053, "step": 4048 }, { "epoch": 0.3940632603406326, "grad_norm": 1.3282406027353941, "learning_rate": 6.90941195502529e-06, "loss": 0.3465, "step": 4049 }, { "epoch": 0.39416058394160586, "grad_norm": 1.1979553824540066, "learning_rate": 6.907955160269107e-06, "loss": 0.3624, "step": 4050 }, { "epoch": 0.39425790754257906, "grad_norm": 1.479713025623162, "learning_rate": 6.90649817591825e-06, "loss": 0.5207, "step": 4051 }, { "epoch": 0.3943552311435523, "grad_norm": 1.2910853380088252, "learning_rate": 6.905041002117494e-06, "loss": 0.3805, "step": 4052 }, { "epoch": 0.39445255474452556, "grad_norm": 1.322518756928142, "learning_rate": 6.903583639011647e-06, "loss": 0.3741, "step": 4053 }, { "epoch": 0.39454987834549876, "grad_norm": 1.388861062512862, "learning_rate": 6.902126086745521e-06, "loss": 0.3978, "step": 4054 }, { "epoch": 0.394647201946472, "grad_norm": 1.431056050315178, "learning_rate": 6.900668345463958e-06, "loss": 0.4779, "step": 4055 }, { "epoch": 0.39474452554744527, "grad_norm": 1.3089722954111018, "learning_rate": 6.8992104153118124e-06, "loss": 0.2481, "step": 4056 }, { "epoch": 0.39484184914841847, "grad_norm": 1.571119717801153, "learning_rate": 6.8977522964339596e-06, "loss": 0.422, "step": 4057 }, { "epoch": 0.3949391727493917, "grad_norm": 1.6272627408819231, "learning_rate": 6.896293988975297e-06, "loss": 0.5442, "step": 4058 }, { "epoch": 0.395036496350365, "grad_norm": 1.5163652637569427, "learning_rate": 6.894835493080733e-06, "loss": 0.4726, "step": 4059 }, { "epoch": 0.39513381995133817, "grad_norm": 2.0784976022874897, "learning_rate": 6.8933768088952025e-06, "loss": 0.4263, "step": 4060 }, { "epoch": 0.3952311435523114, "grad_norm": 1.7443301449253348, "learning_rate": 6.8919179365636546e-06, "loss": 0.3929, "step": 4061 }, { "epoch": 0.3953284671532847, "grad_norm": 1.7924502086241416, "learning_rate": 6.8904588762310586e-06, "loss": 0.5579, "step": 4062 }, { "epoch": 0.39542579075425793, "grad_norm": 1.0193876552673304, "learning_rate": 6.888999628042401e-06, "loss": 0.2416, "step": 4063 }, { "epoch": 0.39552311435523113, "grad_norm": 1.341170474322962, "learning_rate": 6.887540192142691e-06, "loss": 0.3074, "step": 4064 }, { "epoch": 0.3956204379562044, "grad_norm": 1.8133739469107961, "learning_rate": 6.88608056867695e-06, "loss": 0.6511, "step": 4065 }, { "epoch": 0.39571776155717764, "grad_norm": 1.6797513727862947, "learning_rate": 6.884620757790226e-06, "loss": 0.5998, "step": 4066 }, { "epoch": 0.39581508515815084, "grad_norm": 1.3474983454536655, "learning_rate": 6.883160759627577e-06, "loss": 0.4278, "step": 4067 }, { "epoch": 0.3959124087591241, "grad_norm": 1.2209023274964024, "learning_rate": 6.881700574334087e-06, "loss": 0.2868, "step": 4068 }, { "epoch": 0.39600973236009734, "grad_norm": 1.5229304325648796, "learning_rate": 6.880240202054854e-06, "loss": 0.4164, "step": 4069 }, { "epoch": 0.39610705596107054, "grad_norm": 1.3625612735094477, "learning_rate": 6.878779642934996e-06, "loss": 0.3048, "step": 4070 }, { "epoch": 0.3962043795620438, "grad_norm": 1.4190083205534945, "learning_rate": 6.8773188971196515e-06, "loss": 0.385, "step": 4071 }, { "epoch": 0.39630170316301705, "grad_norm": 1.3525245197492086, "learning_rate": 6.875857964753973e-06, "loss": 0.3608, "step": 4072 }, { "epoch": 0.39639902676399025, "grad_norm": 1.4522778082083179, "learning_rate": 6.874396845983134e-06, "loss": 0.5594, "step": 4073 }, { "epoch": 0.3964963503649635, "grad_norm": 1.4455429305856882, "learning_rate": 6.87293554095233e-06, "loss": 0.4933, "step": 4074 }, { "epoch": 0.39659367396593675, "grad_norm": 1.4604158337092326, "learning_rate": 6.871474049806771e-06, "loss": 0.5305, "step": 4075 }, { "epoch": 0.39669099756690995, "grad_norm": 1.3489977365676973, "learning_rate": 6.870012372691685e-06, "loss": 0.4778, "step": 4076 }, { "epoch": 0.3967883211678832, "grad_norm": 1.356695757623522, "learning_rate": 6.86855050975232e-06, "loss": 0.3774, "step": 4077 }, { "epoch": 0.39688564476885646, "grad_norm": 1.1716870983796823, "learning_rate": 6.867088461133941e-06, "loss": 0.3492, "step": 4078 }, { "epoch": 0.39698296836982966, "grad_norm": 1.478028392255781, "learning_rate": 6.865626226981834e-06, "loss": 0.4141, "step": 4079 }, { "epoch": 0.3970802919708029, "grad_norm": 1.239501688643302, "learning_rate": 6.864163807441304e-06, "loss": 0.3388, "step": 4080 }, { "epoch": 0.39717761557177617, "grad_norm": 1.5351372249288278, "learning_rate": 6.86270120265767e-06, "loss": 0.5215, "step": 4081 }, { "epoch": 0.3972749391727494, "grad_norm": 1.382749480627249, "learning_rate": 6.861238412776272e-06, "loss": 0.5118, "step": 4082 }, { "epoch": 0.3973722627737226, "grad_norm": 1.3595228443646563, "learning_rate": 6.8597754379424695e-06, "loss": 0.3972, "step": 4083 }, { "epoch": 0.39746958637469587, "grad_norm": 1.9244235129217742, "learning_rate": 6.858312278301638e-06, "loss": 0.4423, "step": 4084 }, { "epoch": 0.3975669099756691, "grad_norm": 1.4893887621731592, "learning_rate": 6.856848933999174e-06, "loss": 0.4281, "step": 4085 }, { "epoch": 0.3976642335766423, "grad_norm": 1.159936646507266, "learning_rate": 6.85538540518049e-06, "loss": 0.2303, "step": 4086 }, { "epoch": 0.3977615571776156, "grad_norm": 1.5864417866949991, "learning_rate": 6.853921691991018e-06, "loss": 0.301, "step": 4087 }, { "epoch": 0.39785888077858883, "grad_norm": 1.2139649969393456, "learning_rate": 6.852457794576207e-06, "loss": 0.3066, "step": 4088 }, { "epoch": 0.39795620437956203, "grad_norm": 1.4984653887972654, "learning_rate": 6.850993713081527e-06, "loss": 0.4157, "step": 4089 }, { "epoch": 0.3980535279805353, "grad_norm": 1.3362834103353325, "learning_rate": 6.8495294476524636e-06, "loss": 0.2316, "step": 4090 }, { "epoch": 0.39815085158150854, "grad_norm": 1.4160149515084606, "learning_rate": 6.848064998434523e-06, "loss": 0.4297, "step": 4091 }, { "epoch": 0.39824817518248173, "grad_norm": 1.4266675728649458, "learning_rate": 6.846600365573226e-06, "loss": 0.3893, "step": 4092 }, { "epoch": 0.398345498783455, "grad_norm": 1.4688794505110578, "learning_rate": 6.845135549214117e-06, "loss": 0.2136, "step": 4093 }, { "epoch": 0.39844282238442824, "grad_norm": 1.3580044820287391, "learning_rate": 6.843670549502755e-06, "loss": 0.335, "step": 4094 }, { "epoch": 0.39854014598540144, "grad_norm": 1.4677889500126287, "learning_rate": 6.842205366584716e-06, "loss": 0.1751, "step": 4095 }, { "epoch": 0.3986374695863747, "grad_norm": 1.3678338838909818, "learning_rate": 6.840740000605598e-06, "loss": 0.4195, "step": 4096 }, { "epoch": 0.39873479318734795, "grad_norm": 1.611968238148494, "learning_rate": 6.8392744517110135e-06, "loss": 0.4716, "step": 4097 }, { "epoch": 0.39883211678832114, "grad_norm": 1.2798081635725236, "learning_rate": 6.837808720046598e-06, "loss": 0.2324, "step": 4098 }, { "epoch": 0.3989294403892944, "grad_norm": 1.2301645538697144, "learning_rate": 6.836342805758e-06, "loss": 0.3178, "step": 4099 }, { "epoch": 0.39902676399026765, "grad_norm": 1.3243492759809474, "learning_rate": 6.834876708990887e-06, "loss": 0.3202, "step": 4100 }, { "epoch": 0.39912408759124085, "grad_norm": 1.6002987225145617, "learning_rate": 6.833410429890948e-06, "loss": 0.3685, "step": 4101 }, { "epoch": 0.3992214111922141, "grad_norm": 1.3678479152711107, "learning_rate": 6.8319439686038905e-06, "loss": 0.4836, "step": 4102 }, { "epoch": 0.39931873479318736, "grad_norm": 1.4215810283225752, "learning_rate": 6.830477325275432e-06, "loss": 0.433, "step": 4103 }, { "epoch": 0.3994160583941606, "grad_norm": 1.2240968936921577, "learning_rate": 6.829010500051319e-06, "loss": 0.3181, "step": 4104 }, { "epoch": 0.3995133819951338, "grad_norm": 1.3093225983032648, "learning_rate": 6.8275434930773065e-06, "loss": 0.3464, "step": 4105 }, { "epoch": 0.39961070559610706, "grad_norm": 1.3776375009966235, "learning_rate": 6.826076304499174e-06, "loss": 0.4843, "step": 4106 }, { "epoch": 0.3997080291970803, "grad_norm": 1.2816280179506763, "learning_rate": 6.8246089344627174e-06, "loss": 0.3877, "step": 4107 }, { "epoch": 0.3998053527980535, "grad_norm": 1.739257469798359, "learning_rate": 6.823141383113748e-06, "loss": 0.5034, "step": 4108 }, { "epoch": 0.39990267639902677, "grad_norm": 1.433615286766659, "learning_rate": 6.8216736505981e-06, "loss": 0.456, "step": 4109 }, { "epoch": 0.4, "grad_norm": 1.352765027942841, "learning_rate": 6.820205737061621e-06, "loss": 0.4045, "step": 4110 }, { "epoch": 0.4000973236009732, "grad_norm": 1.5427269854846495, "learning_rate": 6.8187376426501795e-06, "loss": 0.5184, "step": 4111 }, { "epoch": 0.4001946472019465, "grad_norm": 1.3420595581039498, "learning_rate": 6.81726936750966e-06, "loss": 0.4387, "step": 4112 }, { "epoch": 0.4002919708029197, "grad_norm": 1.7226489838946462, "learning_rate": 6.815800911785968e-06, "loss": 0.6075, "step": 4113 }, { "epoch": 0.4003892944038929, "grad_norm": 1.4330949045341035, "learning_rate": 6.814332275625024e-06, "loss": 0.4566, "step": 4114 }, { "epoch": 0.4004866180048662, "grad_norm": 1.4735337129897166, "learning_rate": 6.812863459172765e-06, "loss": 0.3747, "step": 4115 }, { "epoch": 0.40058394160583943, "grad_norm": 1.1425351387480063, "learning_rate": 6.811394462575149e-06, "loss": 0.2628, "step": 4116 }, { "epoch": 0.40068126520681263, "grad_norm": 1.43999339592267, "learning_rate": 6.809925285978152e-06, "loss": 0.5026, "step": 4117 }, { "epoch": 0.4007785888077859, "grad_norm": 1.4172325219654425, "learning_rate": 6.808455929527768e-06, "loss": 0.4025, "step": 4118 }, { "epoch": 0.40087591240875914, "grad_norm": 1.7910927659189848, "learning_rate": 6.806986393370006e-06, "loss": 0.6098, "step": 4119 }, { "epoch": 0.40097323600973234, "grad_norm": 1.2075775786568461, "learning_rate": 6.805516677650896e-06, "loss": 0.3196, "step": 4120 } ], "logging_steps": 1.0, "max_steps": 10275, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 515, "total_flos": 527298366177280.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }